diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -504,13 +504,6 @@ const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); - /// Widen an integer or floating-point induction variable \p IV. If \p Trunc - /// is provided, the integer induction variable will first be truncated to - /// the corresponding type. \p CanonicalIV is the scalar value generated for - /// the canonical induction variable. - void widenIntOrFpInduction(PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, - VPTransformState &State, Value *CanonicalIV); - /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State); @@ -611,16 +604,6 @@ /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); - /// Create a vector induction phi node based on an existing scalar one. \p - /// EntryVal is the value from the original loop that maps to the vector phi - /// node, and \p Step is the loop-invariant step. If \p EntryVal is a - /// truncate instruction, instead of widening the original IV, we widen a - /// version of the IV truncated to \p EntryVal's type. - void createVectorIntOrFpInductionPHI(const InductionDescriptor &II, - Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, - VPTransformState &State); - /// Returns (and creates if needed) the original loop trip count. Value *getOrCreateTripCount(Loop *NewLoop); @@ -2373,89 +2356,6 @@ return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); } -void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( - const InductionDescriptor &II, Value *Step, Value *Start, - Instruction *EntryVal, VPValue *Def, VPTransformState &State) { - IRBuilderBase &Builder = State.Builder; - assert((isa(EntryVal) || isa(EntryVal)) && - "Expected either an induction phi-node or a truncate of it!"); - - // Construct the initial value of the vector IV in the vector loop preheader - auto CurrIP = Builder.saveIP(); - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - if (isa(EntryVal)) { - assert(Start->getType()->isIntegerTy() && - "Truncation requires an integer type"); - auto *TruncType = cast(EntryVal->getType()); - Step = Builder.CreateTrunc(Step, TruncType); - Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); - } - - Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); - Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector( - SplatStart, Zero, Step, II.getInductionOpcode(), State.VF, State.Builder); - - // We create vector phi nodes for both integer and floating-point induction - // variables. Here, we determine the kind of arithmetic we will perform. - Instruction::BinaryOps AddOp; - Instruction::BinaryOps MulOp; - if (Step->getType()->isIntegerTy()) { - AddOp = Instruction::Add; - MulOp = Instruction::Mul; - } else { - AddOp = II.getInductionOpcode(); - MulOp = Instruction::FMul; - } - - // Multiply the vectorization factor by the step using integer or - // floating-point arithmetic as appropriate. - Type *StepType = Step->getType(); - Value *RuntimeVF; - if (Step->getType()->isFloatingPointTy()) - RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); - else - RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); - Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); - - // Create a vector splat to use in the induction update. - // - // FIXME: If the step is non-constant, we create the vector splat with - // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't - // handle a constant vector splat. - Value *SplatVF = isa(Mul) - ? ConstantVector::getSplat(State.VF, cast(Mul)) - : Builder.CreateVectorSplat(State.VF, Mul); - Builder.restoreIP(CurrIP); - - // We may need to add the step a number of times, depending on the unroll - // factor. The last of those goes into the PHI. - PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", - &*LoopVectorBody->getFirstInsertionPt()); - VecInd->setDebugLoc(EntryVal->getDebugLoc()); - Instruction *LastInduction = VecInd; - for (unsigned Part = 0; Part < UF; ++Part) { - State.set(Def, LastInduction, Part); - - if (isa(EntryVal)) - addMetadata(LastInduction, EntryVal); - - LastInduction = cast( - Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); - LastInduction->setDebugLoc(EntryVal->getDebugLoc()); - } - - // Move the last step to the end of the latch block. This ensures consistent - // placement of all induction updates. - auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - auto *Br = cast(LoopVectorLatch->getTerminator()); - LastInduction->moveBefore(Br); - LastInduction->setName("vec.ind.next"); - - VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); - VecInd->addIncoming(LastInduction, LoopVectorLatch); -} - /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step. static void buildScalarSteps(Value *ScalarIV, Value *Step, @@ -2629,83 +2529,6 @@ llvm_unreachable("invalid enum"); } -void InnerLoopVectorizer::widenIntOrFpInduction( - PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, - Value *CanonicalIV) { - Value *Start = Def->getStartValue()->getLiveInIRValue(); - const InductionDescriptor &ID = Def->getInductionDescriptor(); - TruncInst *Trunc = Def->getTruncInst(); - IRBuilderBase &Builder = State.Builder; - assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); - assert(State.VF.isVector() && "must have vector VF"); - - // The value from the original loop to which we are mapping the new induction - // variable. - Instruction *EntryVal = Trunc ? cast(Trunc) : IV; - - auto &DL = EntryVal->getModule()->getDataLayout(); - - // Generate code for the induction step. Note that induction steps are - // required to be loop-invariant - auto CreateStepValue = [&](const SCEV *Step) -> Value * { - assert(PSE.getSE()->isLoopInvariant(Step, OrigLoop) && - "Induction step should be loop invariant"); - if (PSE.getSE()->isSCEVable(IV->getType())) { - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - return Exp.expandCodeFor(Step, Step->getType(), - State.CFG.VectorPreHeader->getTerminator()); - } - return cast(Step)->getValue(); - }; - - // The scalar value to broadcast. This is derived from the canonical - // induction variable. If a truncation type is given, truncate the canonical - // induction variable and step. Otherwise, derive these values from the - // induction descriptor. - auto CreateScalarIV = [&](Value *&Step) -> Value * { - Value *ScalarIV = CanonicalIV; - Type *NeededType = IV->getType(); - if (!Def->isCanonical() || ScalarIV->getType() != NeededType) { - ScalarIV = - NeededType->isIntegerTy() - ? Builder.CreateSExtOrTrunc(ScalarIV, NeededType) - : Builder.CreateCast(Instruction::SIToFP, ScalarIV, NeededType); - ScalarIV = emitTransformedIndex(Builder, ScalarIV, Start, Step, ID); - ScalarIV->setName("offset.idx"); - } - if (Trunc) { - auto *TruncType = cast(Trunc->getType()); - assert(Step->getType()->isIntegerTy() && - "Truncation requires an integer step"); - ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType); - Step = Builder.CreateTrunc(Step, TruncType); - } - return ScalarIV; - }; - - // Fast-math-flags propagate from the original induction instruction. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) - Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - - // Now do the actual transformations, and start with creating the step value. - Value *Step = CreateStepValue(ID.getStep()); - - // Create a new independent vector induction variable. Later VPlan2VPlan - // optimizations will remove it, if it won't be needed, e.g. because all users - // of it access scalar values. - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - - if (Def->needsScalarIV()) { - // Create scalar steps that can be used by instructions we will later - // scalarize. Note that the addition of the scalar steps will not increase - // the number of instructions in the loop in the common case prior to - // InstCombine. We will be trading one vector extract for each scalar step. - Value *ScalarIV = CreateScalarIV(Step); - buildScalarSteps(ScalarIV, Step, ID, Def, State); - } -} - void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State) { @@ -8473,8 +8296,8 @@ static VPWidenIntOrFpInductionRecipe * createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start, const InductionDescriptor &IndDesc, - LoopVectorizationCostModel &CM, Loop &OrigLoop, - VFRange &Range) { + LoopVectorizationCostModel &CM, ScalarEvolution &SE, + Loop &OrigLoop, VFRange &Range) { // Returns true if an instruction \p I should be scalarized instead of // vectorized for the chosen vectorization factor. auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) { @@ -8501,13 +8324,15 @@ Range); assert(IndDesc.getStartValue() == Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader())); + assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) && + "step must be loop invariant"); if (auto *TruncI = dyn_cast(PhiOrTrunc)) { - return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI, - NeedsScalarIV, !NeedsScalarIVOnly); + return new VPWidenIntOrFpInductionRecipe( + Phi, Start, IndDesc, TruncI, NeedsScalarIV, !NeedsScalarIVOnly, SE); } assert(isa(PhiOrTrunc) && "must be a phi node here"); return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV, - !NeedsScalarIVOnly); + !NeedsScalarIVOnly, SE); } VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( @@ -8516,8 +8341,8 @@ // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) - return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop, - Range); + return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, + *PSE.getSE(), *OrigLoop, Range); return nullptr; } @@ -8545,7 +8370,8 @@ auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range); + return createWidenInductionRecipe(Phi, I, Start, II, CM, *PSE.getSE(), + *OrigLoop, Range); } return nullptr; } @@ -9720,8 +9546,117 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0); - State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV); + + Value *Start = getStartValue()->getLiveInIRValue(); + const InductionDescriptor &ID = getInductionDescriptor(); + TruncInst *Trunc = getTruncInst(); + IRBuilderBase &Builder = State.Builder; + assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); + assert(State.VF.isVector() && "must have vector VF"); + + // The value from the original loop to which we are mapping the new induction + // variable. + Instruction *EntryVal = Trunc ? cast(Trunc) : IV; + + auto &DL = EntryVal->getModule()->getDataLayout(); + + // Generate code for the induction step. Note that induction steps are + // required to be loop-invariant + auto CreateStepValue = [&](const SCEV *Step) -> Value * { + if (SE.isSCEVable(IV->getType())) { + SCEVExpander Exp(SE, DL, "induction"); + return Exp.expandCodeFor(Step, Step->getType(), + State.CFG.VectorPreHeader->getTerminator()); + } + return cast(Step)->getValue(); + }; + + // Fast-math-flags propagate from the original induction instruction. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) + Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); + + // Now do the actual transformations, and start with creating the step value. + Value *Step = CreateStepValue(ID.getStep()); + + assert((isa(EntryVal) || isa(EntryVal)) && + "Expected either an induction phi-node or a truncate of it!"); + + // Construct the initial value of the vector IV in the vector loop preheader + auto CurrIP = Builder.saveIP(); + Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + if (isa(EntryVal)) { + assert(Start->getType()->isIntegerTy() && + "Truncation requires an integer type"); + auto *TruncType = cast(EntryVal->getType()); + Step = Builder.CreateTrunc(Step, TruncType); + Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); + } + + Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); + Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); + Value *SteppedStart = getStepVector( + SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder); + + // We create vector phi nodes for both integer and floating-point induction + // variables. Here, we determine the kind of arithmetic we will perform. + Instruction::BinaryOps AddOp; + Instruction::BinaryOps MulOp; + if (Step->getType()->isIntegerTy()) { + AddOp = Instruction::Add; + MulOp = Instruction::Mul; + } else { + AddOp = ID.getInductionOpcode(); + MulOp = Instruction::FMul; + } + + // Multiply the vectorization factor by the step using integer or + // floating-point arithmetic as appropriate. + Type *StepType = Step->getType(); + Value *RuntimeVF; + if (Step->getType()->isFloatingPointTy()) + RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF); + else + RuntimeVF = getRuntimeVF(Builder, StepType, State.VF); + Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); + + // Create a vector splat to use in the induction update. + // + // FIXME: If the step is non-constant, we create the vector splat with + // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't + // handle a constant vector splat. + Value *SplatVF = isa(Mul) + ? ConstantVector::getSplat(State.VF, cast(Mul)) + : Builder.CreateVectorSplat(State.VF, Mul); + Builder.restoreIP(CurrIP); + + // We may need to add the step a number of times, depending on the unroll + // factor. The last of those goes into the PHI. + PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", + &*State.CFG.PrevBB->getFirstInsertionPt()); + VecInd->setDebugLoc(EntryVal->getDebugLoc()); + Instruction *LastInduction = VecInd; + for (unsigned Part = 0; Part < State.UF; ++Part) { + State.set(this, LastInduction, Part); + + if (isa(EntryVal)) + State.ILV->addMetadata(LastInduction, EntryVal); + + LastInduction = cast( + Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")); + LastInduction->setDebugLoc(EntryVal->getDebugLoc()); + } + + // Move the last step to the end of the latch block. This ensures consistent + // placement of all induction updates. + auto *LoopVectorLatch = + State.LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); + auto *Br = cast(LoopVectorLatch->getTerminator()); + LastInduction->moveBefore(Br); + LastInduction->setName("vec.ind.next"); + + VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader); + VecInd->addIncoming(LastInduction, LoopVectorLatch); } void VPScalarIVStepsRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1062,28 +1062,34 @@ }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector and scalar values. +/// producing their vector values. class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { PHINode *IV; const InductionDescriptor &IndDesc; bool NeedsScalarIV; bool NeedsVectorIV; + /// SCEV used to expand step. + /// FIXME: move expansion of step to the pre-header, once it is modeled + /// explicitly. + ScalarEvolution &SE; + public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, - bool NeedsScalarIV, bool NeedsVectorIV) + bool NeedsScalarIV, bool NeedsVectorIV, + ScalarEvolution &SE) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), - NeedsVectorIV(NeedsVectorIV) {} + NeedsVectorIV(NeedsVectorIV), SE(SE) {} VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, const InductionDescriptor &IndDesc, TruncInst *Trunc, bool NeedsScalarIV, - bool NeedsVectorIV) + bool NeedsVectorIV, ScalarEvolution &SE) : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV), - NeedsVectorIV(NeedsVectorIV) {} + NeedsVectorIV(NeedsVectorIV), SE(SE) {} ~VPWidenIntOrFpInductionRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -55,8 +55,10 @@ /// removed. static void removeDeadRecipes(VPlan &Plan, Loop &OrigLoop); - // If all users of a vector IV need scalar values, provide them by building - // scalar steps off of the canonical scalar IV, and remove the vector IV. + /// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values, + /// provide them by building scalar steps off of the canonical scalar IV and + /// update the original IV's users. This is an optional optimization to reduce + /// the needs of vector extracts. static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE); }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -47,8 +47,8 @@ auto *Phi = cast(VPPhi->getUnderlyingValue()); if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); - NewRecipe = - new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true); + NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, + true, SE); } else { Plan->addVPValue(Phi, VPPhi); continue; @@ -402,7 +402,7 @@ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *IV = dyn_cast(&Phi); - if (!IV || IV->needsVectorIV()) + if (!IV || !IV->needsScalarIV()) continue; const InductionDescriptor &ID = IV->getInductionDescriptor(); @@ -430,6 +430,25 @@ HeaderVPBB->insert(cast(Step->getDef()), HeaderVPBB->getFirstNonPhi()); } - IV->replaceAllUsesWith(Steps); + + // If there are no vector users of IV, simply update all users to use Step + // instead. + if (!IV->needsVectorIV()) { + IV->replaceAllUsesWith(Steps); + continue; + } + + // Otherwise only update scalar users of IV to use Step instead. + SmallVector Users(IV->user_begin(), IV->user_end()); + for (VPUser *U : Users) { + VPRecipeBase *R = cast(U); + if (!R->usesScalars(IV)) + continue; + for (unsigned I = 0, E = R->getNumOperands(); I != E; I++) { + if (R->getOperand(I) != IV) + continue; + R->setOperand(I, Steps); + } + } } } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -303,39 +303,37 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector [[DOTSPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector [[DOTSPLATINSERT4]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add [[DOTSPLAT5]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = add [[DOTSPLAT3]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = shl [[TMP9]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[B:%.*]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[B]], [[TMP11]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast float* [[TMP14]] to * -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], * [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to * -; CHECK-NEXT: store [[WIDE_MASKED_GATHER6]], * [[TMP20]], align 4 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP21]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[B:%.*]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B]], [[TMP8]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to * +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], * [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to * +; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], * [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -346,9 +344,9 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_STRIDE2]] -; CHECK-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP24]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: store float [[TMP21]], float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -22,25 +22,28 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc [[TMP5]] to -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP8]], i32 4, [[TMP6]], poison) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[WIDE_MASKED_LOAD]], * [[TMP10]], i32 4, [[TMP6]]) -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[VEC_IND]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP9]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[WIDE_MASKED_LOAD]], * [[TMP11]], i32 4, [[TMP7]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -54,9 +57,9 @@ ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -216,9 +216,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], @@ -284,9 +281,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], @@ -423,9 +417,6 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND2]] ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP6]] @@ -548,9 +539,6 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND4]] ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i32> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -557,25 +557,22 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32*> [[BROADCAST_SPLAT]], <4 x i32*> [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, <4 x i32*> [[TMP8]], <4 x i32> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP9]], i32 4, <4 x i1> , <4 x i32> undef) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32*> [[BROADCAST_SPLAT]], <4 x i32*> [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, <4 x i32*> [[TMP5]], <4 x i32> [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP6]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32>* [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -589,8 +586,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i32 [[I_011]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP11]], 0 ; CHECK-NEXT: [[C_B:%.*]] = select i1 [[TOBOOL_NOT]], i32* [[C]], i32* [[B]] ; CHECK-NEXT: [[COND_IN:%.*]] = getelementptr inbounds i32, i32* [[C_B]], i32 [[I_011]] ; CHECK-NEXT: [[COND:%.*]] = load i32, i32* [[COND_IN]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -21,7 +21,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <32 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 @@ -30,7 +29,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <32 x i8>* ; CHECK-NEXT: store <32 x i8> [[VEC_IND1]], <32 x i8>* [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <32 x i8> [[VEC_IND1]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -94,13 +92,9 @@ ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 7 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[STEP_ADD]], ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -175,23 +175,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 32 -; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 48 -; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 64 -; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 80 -; AVX512-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 -; AVX512-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 112 -; AVX512-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 128 -; AVX512-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 144 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 160 -; AVX512-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 176 -; AVX512-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 192 -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 208 -; AVX512-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 224 -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 240 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP16]], i32 4, <16 x i1> , <16 x i32> undef) ; AVX512-NEXT: [[TMP17:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer @@ -339,23 +322,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 32 -; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 48 -; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 64 -; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 80 -; AVX512-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 -; AVX512-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 112 -; AVX512-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 128 -; AVX512-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 144 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 160 -; AVX512-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 176 -; AVX512-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 192 -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 208 -; AVX512-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 224 -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 240 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP16]], i32 4, <16 x i1> , <16 x i32> undef) ; AVX512-NEXT: [[TMP17:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer @@ -490,23 +456,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 32 -; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 48 -; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 64 -; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 80 -; AVX512-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 -; AVX512-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 112 -; AVX512-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 128 -; AVX512-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 144 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 160 -; AVX512-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 176 -; AVX512-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 192 -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 208 -; AVX512-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 224 -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 240 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP16]], i32 4, <16 x i1> , <16 x i32> undef) ; AVX512-NEXT: [[TMP17:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer @@ -640,23 +589,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 32 -; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 48 -; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 64 -; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 80 -; AVX512-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 -; AVX512-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 112 -; AVX512-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 128 -; AVX512-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 144 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 160 -; AVX512-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 176 -; AVX512-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 192 -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 208 -; AVX512-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 224 -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 240 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP16]], i32 4, <16 x i1> , <16 x i32> undef) ; AVX512-NEXT: [[TMP17:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer @@ -790,23 +722,6 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 32 -; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 48 -; AVX512-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 64 -; AVX512-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 80 -; AVX512-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 -; AVX512-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 112 -; AVX512-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 128 -; AVX512-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 144 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 160 -; AVX512-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 176 -; AVX512-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 192 -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 208 -; AVX512-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 224 -; AVX512-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 240 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP16]], i32 4, <16 x i1> , <16 x i32> undef) ; AVX512-NEXT: [[TMP17:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -34,75 +34,63 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT8]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT10]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT12]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[TMP20]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[TMP20]], i32 4 -; CHECK-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP27]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, i32* [[TMP20]], i32 8 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i32, i32* [[TMP20]], i32 12 -; CHECK-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP31]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = xor <4 x i1> [[TMP16]], -; CHECK-NEXT: [[TMP33:%.*]] = xor <4 x i1> [[TMP17]], -; CHECK-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[TMP18]], -; CHECK-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[TMP19]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI16:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[WIDE_LOAD13]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[WIDE_LOAD14]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI18:%.*]] = select <4 x i1> [[TMP19]], <4 x i32> [[WIDE_LOAD15]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP36]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] -; CHECK-NEXT: [[TMP37]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI16]] -; CHECK-NEXT: [[TMP38]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI17]] -; CHECK-NEXT: [[TMP39]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI18]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT8]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[TMP8]], i32 8 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP8]], i32 12 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI16:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD13]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD14]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI18:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[WIDE_LOAD15]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI16]] +; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI17]] +; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI18]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP37]], [[TMP36]] -; CHECK-NEXT: [[BIN_RDX19:%.*]] = add <4 x i32> [[TMP38]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX20:%.*]] = add <4 x i32> [[TMP39]], [[BIN_RDX19]] -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]] +; CHECK-NEXT: [[BIN_RDX19:%.*]] = add <4 x i32> [[TMP26]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX20:%.*]] = add <4 x i32> [[TMP27]], [[BIN_RDX19]] +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -120,7 +108,7 @@ ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 ; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -231,69 +231,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <64 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 10 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 11 -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 12 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 13 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 14 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[INDEX]], 17 -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 18 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[INDEX]], 19 -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 20 -; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[INDEX]], 21 -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 22 -; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 23 -; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 24 -; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 25 -; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 26 -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[INDEX]], 27 -; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 28 -; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 29 -; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[INDEX]], 30 -; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[INDEX]], 31 -; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 33 -; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[INDEX]], 34 -; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 35 -; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 36 -; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 37 -; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[INDEX]], 38 -; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[INDEX]], 39 -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 40 -; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 41 -; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[INDEX]], 42 -; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[INDEX]], 43 -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 44 -; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[INDEX]], 45 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[INDEX]], 46 -; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[INDEX]], 47 -; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 48 -; CHECK-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 49 -; CHECK-NEXT: [[TMP50:%.*]] = add i32 [[INDEX]], 50 -; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[INDEX]], 51 -; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 52 -; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[INDEX]], 53 -; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[INDEX]], 54 -; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 55 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 56 -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[INDEX]], 57 -; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 58 -; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[INDEX]], 59 -; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 60 -; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 61 -; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[INDEX]], 62 -; CHECK-NEXT: [[TMP63:%.*]] = add i32 [[INDEX]], 63 ; CHECK-NEXT: [[TMP64:%.*]] = mul nsw <64 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <64 x i32> [[TMP64]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0i32(<64 x i32*> [[TMP65]], i32 4, <64 x i1> , <64 x i32> undef) @@ -335,13 +272,6 @@ ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTOVF-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; AUTOVF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; AUTOVF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; AUTOVF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; AUTOVF-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 -; AUTOVF-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 -; AUTOVF-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 -; AUTOVF-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 ; AUTOVF-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; AUTOVF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <8 x i32> [[TMP8]] ; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP9]], i32 4, <8 x i1> , <8 x i32> undef) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -7,7 +7,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND_TRUNC:%.+]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_IND_TRUNC_NEXT:%.+]], %vector.body ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] ; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TRUNC]], 0 @@ -22,7 +21,6 @@ ; CHECK: store <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_TRUNC_NEXT]] = add <4 x i32> [[VEC_IND_TRUNC]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -719,7 +719,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 @@ -731,7 +730,6 @@ ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* ; CHECK-NEXT: store <4 x i16> [[TMP8]], <4 x i16>* [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -800,7 +798,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 @@ -812,7 +809,6 @@ ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* ; CHECK-NEXT: store <4 x i16> [[TMP8]], <4 x i16>* [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -18,6 +18,7 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: @@ -31,7 +32,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK]]> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: @@ -102,6 +103,7 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: @@ -122,7 +124,7 @@ ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -241,8 +243,9 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: @@ -335,6 +338,7 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: @@ -373,7 +377,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<[[PRED]]> -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -2132,14 +2132,6 @@ ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP11]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP10]], <4 x i32> @@ -2214,10 +2206,6 @@ ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; SINK-AFTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4486,66 +4474,66 @@ ; UNROLL-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; UNROLL-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; UNROLL-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; UNROLL: pred.udiv.if6: +; UNROLL: pred.udiv.if5: ; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NEXT: [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]] ; UNROLL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; UNROLL: pred.udiv.continue7: +; UNROLL: pred.udiv.continue6: ; UNROLL-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF6]] ] ; UNROLL-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 ; UNROLL-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL: pred.udiv.if8: +; UNROLL: pred.udiv.if7: ; UNROLL-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2 ; UNROLL-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]] ; UNROLL-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i64 2 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL: pred.udiv.continue9: +; UNROLL: pred.udiv.continue8: ; UNROLL-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP16]], [[PRED_UDIV_IF8]] ] ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 ; UNROLL-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]] -; UNROLL: pred.udiv.if10: +; UNROLL: pred.udiv.if9: ; UNROLL-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3 ; UNROLL-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]] ; UNROLL-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i64 3 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL: pred.udiv.continue11: +; UNROLL: pred.udiv.continue10: ; UNROLL-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ] ; UNROLL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 ; UNROLL-NEXT: br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] -; UNROLL: pred.udiv.if12: +; UNROLL: pred.udiv.if11: ; UNROLL-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], -4 ; UNROLL-NEXT: [[TMP25:%.*]] = udiv i32 219220132, [[TMP24]] ; UNROLL-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> poison, i32 [[TMP25]], i64 0 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE13]] -; UNROLL: pred.udiv.continue13: +; UNROLL: pred.udiv.continue12: ; UNROLL-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ] ; UNROLL-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 ; UNROLL-NEXT: br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] -; UNROLL: pred.udiv.if14: +; UNROLL: pred.udiv.if13: ; UNROLL-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NEXT: [[TMP30:%.*]] = udiv i32 219220132, [[TMP29]] ; UNROLL-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP30]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE15]] -; UNROLL: pred.udiv.continue15: +; UNROLL: pred.udiv.continue14: ; UNROLL-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ [[TMP27]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP31]], [[PRED_UDIV_IF14]] ] ; UNROLL-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 ; UNROLL-NEXT: br i1 [[TMP33]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] -; UNROLL: pred.udiv.if16: +; UNROLL: pred.udiv.if15: ; UNROLL-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NEXT: [[TMP35:%.*]] = udiv i32 219220132, [[TMP34]] ; UNROLL-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i64 2 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE17]] -; UNROLL: pred.udiv.continue17: +; UNROLL: pred.udiv.continue16: ; UNROLL-NEXT: [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP36]], [[PRED_UDIV_IF16]] ] ; UNROLL-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 ; UNROLL-NEXT: br i1 [[TMP38]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]] -; UNROLL: pred.udiv.if18: +; UNROLL: pred.udiv.if17: ; UNROLL-NEXT: [[TMP39:%.*]] = add i32 [[OFFSET_IDX]], -7 ; UNROLL-NEXT: [[TMP40:%.*]] = udiv i32 219220132, [[TMP39]] ; UNROLL-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i64 3 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE19]] -; UNROLL: pred.udiv.continue19: +; UNROLL: pred.udiv.continue18: ; UNROLL-NEXT: [[TMP42]] = phi <4 x i32> [ [[TMP37]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP41]], [[PRED_UDIV_IF18]] ] ; UNROLL-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; UNROLL-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> [[TMP42]], <4 x i32> @@ -4582,17 +4570,12 @@ ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 -; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE19]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE19]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE19]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE19]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -4611,66 +4594,66 @@ ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; UNROLL-NO-IC: pred.udiv.if6: +; UNROLL-NO-IC: pred.udiv.if5: ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; UNROLL-NO-IC: pred.udiv.continue7: +; UNROLL-NO-IC: pred.udiv.continue6: ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF6]] ] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL-NO-IC: pred.udiv.if8: +; UNROLL-NO-IC: pred.udiv.if7: ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL-NO-IC: pred.udiv.continue9: +; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]] -; UNROLL-NO-IC: pred.udiv.if10: +; UNROLL-NO-IC: pred.udiv.if9: ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], -3 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL-NO-IC: pred.udiv.continue11: +; UNROLL-NO-IC: pred.udiv.continue10: ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ] ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] -; UNROLL-NO-IC: pred.udiv.if12: +; UNROLL-NO-IC: pred.udiv.if11: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], -4 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = udiv i32 219220132, [[TMP25]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE13]] -; UNROLL-NO-IC: pred.udiv.continue13: +; UNROLL-NO-IC: pred.udiv.continue12: ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ] ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP29]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] -; UNROLL-NO-IC: pred.udiv.if14: +; UNROLL-NO-IC: pred.udiv.if13: ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = udiv i32 219220132, [[TMP30]] ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE15]] -; UNROLL-NO-IC: pred.udiv.continue15: +; UNROLL-NO-IC: pred.udiv.continue14: ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ] ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] -; UNROLL-NO-IC: pred.udiv.if16: +; UNROLL-NO-IC: pred.udiv.if15: ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = udiv i32 219220132, [[TMP35]] ; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE17]] -; UNROLL-NO-IC: pred.udiv.continue17: +; UNROLL-NO-IC: pred.udiv.continue16: ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ] ; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]] -; UNROLL-NO-IC: pred.udiv.if18: +; UNROLL-NO-IC: pred.udiv.if17: ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7 ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE19]] -; UNROLL-NO-IC: pred.udiv.continue19: +; UNROLL-NO-IC: pred.udiv.continue18: ; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ] ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> @@ -4679,7 +4662,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4785,13 +4767,9 @@ ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 -; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] -; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] @@ -4839,7 +4817,6 @@ ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] ; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]] ; SINK-AFTER: middle.block: @@ -4918,27 +4895,27 @@ ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; CHECK: pred.udiv.if4: +; CHECK: pred.udiv.if2: ; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i64 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; CHECK: pred.udiv.continue5: +; CHECK: pred.udiv.continue3: ; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF4]] ] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; CHECK: pred.udiv.if6: +; CHECK: pred.udiv.if4: ; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i64 2 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; CHECK: pred.udiv.continue7: +; CHECK: pred.udiv.continue5: ; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP16]], [[PRED_UDIV_IF6]] ] ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 ; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; CHECK: pred.udiv.if8: +; CHECK: pred.udiv.if6: ; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]] ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i64 3 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; CHECK: pred.udiv.continue9: +; CHECK: pred.udiv.continue7: ; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ] ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]] @@ -4952,31 +4929,31 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] -; CHECK: pred.store.if10: +; CHECK: pred.store.if8: ; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE11]] -; CHECK: pred.store.continue11: +; CHECK: pred.store.continue9: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 ; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] -; CHECK: pred.store.if12: +; CHECK: pred.store.if10: ; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]] ; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE13]] -; CHECK: pred.store.continue13: +; CHECK: pred.store.continue11: ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 ; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.if14: +; CHECK: pred.store.if12: ; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.continue15: +; CHECK: pred.store.continue13: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -5012,6 +4989,7 @@ ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ] +; UNROLL-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2 @@ -5020,7 +4998,6 @@ ; UNROLL-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -7 -; UNROLL-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], ; UNROLL-NEXT: [[TMP9:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]] ; UNROLL-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]] ; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0 @@ -5033,59 +5010,59 @@ ; UNROLL-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_UDIV_IF]] ] ; UNROLL-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1 ; UNROLL-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL: pred.udiv.if8: +; UNROLL: pred.udiv.if4: ; UNROLL-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP2]] ; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL: pred.udiv.continue9: +; UNROLL: pred.udiv.continue5: ; UNROLL-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ] ; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2 ; UNROLL-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]] -; UNROLL: pred.udiv.if10: +; UNROLL: pred.udiv.if6: ; UNROLL-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP3]] ; UNROLL-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i64 2 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL: pred.udiv.continue11: +; UNROLL: pred.udiv.continue7: ; UNROLL-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ] ; UNROLL-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3 ; UNROLL-NEXT: br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] -; UNROLL: pred.udiv.if12: +; UNROLL: pred.udiv.if8: ; UNROLL-NEXT: [[TMP24:%.*]] = udiv i32 219220132, [[TMP4]] ; UNROLL-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP24]], i64 3 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE13]] -; UNROLL: pred.udiv.continue13: +; UNROLL: pred.udiv.continue9: ; UNROLL-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP25]], [[PRED_UDIV_IF12]] ] ; UNROLL-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0 ; UNROLL-NEXT: br i1 [[TMP27]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] -; UNROLL: pred.udiv.if14: +; UNROLL: pred.udiv.if10: ; UNROLL-NEXT: [[TMP28:%.*]] = udiv i32 219220132, [[TMP5]] ; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> poison, i32 [[TMP28]], i64 0 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE15]] -; UNROLL: pred.udiv.continue15: +; UNROLL: pred.udiv.continue11: ; UNROLL-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP29]], [[PRED_UDIV_IF14]] ] ; UNROLL-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1 ; UNROLL-NEXT: br i1 [[TMP31]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] -; UNROLL: pred.udiv.if16: +; UNROLL: pred.udiv.if12: ; UNROLL-NEXT: [[TMP32:%.*]] = udiv i32 219220132, [[TMP6]] ; UNROLL-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP32]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE17]] -; UNROLL: pred.udiv.continue17: +; UNROLL: pred.udiv.continue13: ; UNROLL-NEXT: [[TMP34:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP33]], [[PRED_UDIV_IF16]] ] ; UNROLL-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2 ; UNROLL-NEXT: br i1 [[TMP35]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]] -; UNROLL: pred.udiv.if18: +; UNROLL: pred.udiv.if14: ; UNROLL-NEXT: [[TMP36:%.*]] = udiv i32 219220132, [[TMP7]] ; UNROLL-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP36]], i64 2 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE19]] -; UNROLL: pred.udiv.continue19: +; UNROLL: pred.udiv.continue15: ; UNROLL-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP34]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP37]], [[PRED_UDIV_IF18]] ] ; UNROLL-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3 ; UNROLL-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]] -; UNROLL: pred.udiv.if20: +; UNROLL: pred.udiv.if16: ; UNROLL-NEXT: [[TMP40:%.*]] = udiv i32 219220132, [[TMP8]] ; UNROLL-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP40]], i64 3 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE21]] -; UNROLL: pred.udiv.continue21: +; UNROLL: pred.udiv.continue17: ; UNROLL-NEXT: [[TMP42]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP41]], [[PRED_UDIV_IF20]] ] ; UNROLL-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP26]], <4 x i32> ; UNROLL-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> [[TMP42]], <4 x i32> @@ -5101,67 +5078,67 @@ ; UNROLL: pred.store.continue: ; UNROLL-NEXT: [[TMP50:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1 ; UNROLL-NEXT: br i1 [[TMP50]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] -; UNROLL: pred.store.if22: +; UNROLL: pred.store.if18: ; UNROLL-NEXT: [[TMP51:%.*]] = or i32 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP52:%.*]] = sext i32 [[TMP51]] to i64 ; UNROLL-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP52]] ; UNROLL-NEXT: store i32 [[TMP2]], i32* [[TMP53]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE23]] -; UNROLL: pred.store.continue23: +; UNROLL: pred.store.continue19: ; UNROLL-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2 ; UNROLL-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] -; UNROLL: pred.store.if24: +; UNROLL: pred.store.if20: ; UNROLL-NEXT: [[TMP55:%.*]] = or i32 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP56:%.*]] = sext i32 [[TMP55]] to i64 ; UNROLL-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP56]] ; UNROLL-NEXT: store i32 [[TMP3]], i32* [[TMP57]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE25]] -; UNROLL: pred.store.continue25: +; UNROLL: pred.store.continue21: ; UNROLL-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3 ; UNROLL-NEXT: br i1 [[TMP58]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] -; UNROLL: pred.store.if26: +; UNROLL: pred.store.if22: ; UNROLL-NEXT: [[TMP59:%.*]] = or i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP60:%.*]] = sext i32 [[TMP59]] to i64 ; UNROLL-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP60]] ; UNROLL-NEXT: store i32 [[TMP4]], i32* [[TMP61]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE27]] -; UNROLL: pred.store.continue27: +; UNROLL: pred.store.continue23: ; UNROLL-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0 ; UNROLL-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] -; UNROLL: pred.store.if28: +; UNROLL: pred.store.if24: ; UNROLL-NEXT: [[TMP63:%.*]] = or i32 [[INDEX]], 4 ; UNROLL-NEXT: [[TMP64:%.*]] = sext i32 [[TMP63]] to i64 ; UNROLL-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP64]] ; UNROLL-NEXT: store i32 [[TMP5]], i32* [[TMP65]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE29]] -; UNROLL: pred.store.continue29: +; UNROLL: pred.store.continue25: ; UNROLL-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1 ; UNROLL-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] -; UNROLL: pred.store.if30: +; UNROLL: pred.store.if26: ; UNROLL-NEXT: [[TMP67:%.*]] = or i32 [[INDEX]], 5 ; UNROLL-NEXT: [[TMP68:%.*]] = sext i32 [[TMP67]] to i64 ; UNROLL-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP68]] ; UNROLL-NEXT: store i32 [[TMP6]], i32* [[TMP69]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE31]] -; UNROLL: pred.store.continue31: +; UNROLL: pred.store.continue27: ; UNROLL-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2 ; UNROLL-NEXT: br i1 [[TMP70]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] -; UNROLL: pred.store.if32: +; UNROLL: pred.store.if28: ; UNROLL-NEXT: [[TMP71:%.*]] = or i32 [[INDEX]], 6 ; UNROLL-NEXT: [[TMP72:%.*]] = sext i32 [[TMP71]] to i64 ; UNROLL-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP72]] ; UNROLL-NEXT: store i32 [[TMP7]], i32* [[TMP73]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE33]] -; UNROLL: pred.store.continue33: +; UNROLL: pred.store.continue29: ; UNROLL-NEXT: [[TMP74:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3 ; UNROLL-NEXT: br i1 [[TMP74]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]] -; UNROLL: pred.store.if34: +; UNROLL: pred.store.if30: ; UNROLL-NEXT: [[TMP75:%.*]] = or i32 [[INDEX]], 7 ; UNROLL-NEXT: [[TMP76:%.*]] = sext i32 [[TMP75]] to i64 ; UNROLL-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP76]] ; UNROLL-NEXT: store i32 [[TMP8]], i32* [[TMP77]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE35]] -; UNROLL: pred.store.continue35: +; UNROLL: pred.store.continue31: ; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], ; UNROLL-NEXT: [[TMP78:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -5194,18 +5171,14 @@ ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 -; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE35:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE35]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 @@ -5215,7 +5188,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7 -; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 @@ -5228,59 +5200,59 @@ ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_UDIV_IF]] ] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL-NO-IC: pred.udiv.if8: +; UNROLL-NO-IC: pred.udiv.if4: ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = udiv i32 219220132, [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP17]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL-NO-IC: pred.udiv.continue9: +; UNROLL-NO-IC: pred.udiv.continue5: ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_UDIV_IF8]] ] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]] -; UNROLL-NO-IC: pred.udiv.if10: +; UNROLL-NO-IC: pred.udiv.if6: ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 219220132, [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP21]], i32 2 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL-NO-IC: pred.udiv.continue11: +; UNROLL-NO-IC: pred.udiv.continue7: ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ] ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] -; UNROLL-NO-IC: pred.udiv.if12: +; UNROLL-NO-IC: pred.udiv.if8: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = udiv i32 219220132, [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE13]] -; UNROLL-NO-IC: pred.udiv.continue13: +; UNROLL-NO-IC: pred.udiv.continue9: ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ] ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] -; UNROLL-NO-IC: pred.udiv.if14: +; UNROLL-NO-IC: pred.udiv.if10: ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = udiv i32 219220132, [[TMP6]] ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE15]] -; UNROLL-NO-IC: pred.udiv.continue15: +; UNROLL-NO-IC: pred.udiv.continue11: ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP30]], [[PRED_UDIV_IF14]] ] ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP32]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] -; UNROLL-NO-IC: pred.udiv.if16: +; UNROLL-NO-IC: pred.udiv.if12: ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = udiv i32 219220132, [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP33]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE17]] -; UNROLL-NO-IC: pred.udiv.continue17: +; UNROLL-NO-IC: pred.udiv.continue13: ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP34]], [[PRED_UDIV_IF16]] ] ; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP36]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]] -; UNROLL-NO-IC: pred.udiv.if18: +; UNROLL-NO-IC: pred.udiv.if14: ; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = udiv i32 219220132, [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP37]], i32 2 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE19]] -; UNROLL-NO-IC: pred.udiv.continue19: +; UNROLL-NO-IC: pred.udiv.continue15: ; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP35]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP38]], [[PRED_UDIV_IF18]] ] ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP40]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]] -; UNROLL-NO-IC: pred.udiv.if20: +; UNROLL-NO-IC: pred.udiv.if16: ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP9]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP41]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE21]] -; UNROLL-NO-IC: pred.udiv.continue21: +; UNROLL-NO-IC: pred.udiv.continue17: ; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP39]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP42]], [[PRED_UDIV_IF20]] ] ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP27]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> [[TMP43]], <4 x i32> @@ -5296,64 +5268,63 @@ ; UNROLL-NO-IC: pred.store.continue: ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] -; UNROLL-NO-IC: pred.store.if22: +; UNROLL-NO-IC: pred.store.if18: ; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP52]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP3]], i32* [[TMP53]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE23]] -; UNROLL-NO-IC: pred.store.continue23: +; UNROLL-NO-IC: pred.store.continue19: ; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] -; UNROLL-NO-IC: pred.store.if24: +; UNROLL-NO-IC: pred.store.if20: ; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = add i32 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP55]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP4]], i32* [[TMP56]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE25]] -; UNROLL-NO-IC: pred.store.continue25: +; UNROLL-NO-IC: pred.store.continue21: ; UNROLL-NO-IC-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] -; UNROLL-NO-IC: pred.store.if26: +; UNROLL-NO-IC: pred.store.if22: ; UNROLL-NO-IC-NEXT: [[TMP58:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP58]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP5]], i32* [[TMP59]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE27]] -; UNROLL-NO-IC: pred.store.continue27: +; UNROLL-NO-IC: pred.store.continue23: ; UNROLL-NO-IC-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] -; UNROLL-NO-IC: pred.store.if28: +; UNROLL-NO-IC: pred.store.if24: ; UNROLL-NO-IC-NEXT: [[TMP61:%.*]] = add i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP61]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP6]], i32* [[TMP62]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE29]] -; UNROLL-NO-IC: pred.store.continue29: +; UNROLL-NO-IC: pred.store.continue25: ; UNROLL-NO-IC-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] -; UNROLL-NO-IC: pred.store.if30: +; UNROLL-NO-IC: pred.store.if26: ; UNROLL-NO-IC-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 5 ; UNROLL-NO-IC-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP64]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP7]], i32* [[TMP65]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE31]] -; UNROLL-NO-IC: pred.store.continue31: +; UNROLL-NO-IC: pred.store.continue27: ; UNROLL-NO-IC-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] -; UNROLL-NO-IC: pred.store.if32: +; UNROLL-NO-IC: pred.store.if28: ; UNROLL-NO-IC-NEXT: [[TMP67:%.*]] = add i32 [[INDEX]], 6 ; UNROLL-NO-IC-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP67]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP8]], i32* [[TMP68]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE33]] -; UNROLL-NO-IC: pred.store.continue33: +; UNROLL-NO-IC: pred.store.continue29: ; UNROLL-NO-IC-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]] -; UNROLL-NO-IC: pred.store.if34: +; UNROLL-NO-IC: pred.store.if30: ; UNROLL-NO-IC-NEXT: [[TMP70:%.*]] = add i32 [[INDEX]], 7 ; UNROLL-NO-IC-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP70]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP9]], i32* [[TMP71]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE35]] -; UNROLL-NO-IC: pred.store.continue35: +; UNROLL-NO-IC: pred.store.continue31: ; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI7]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] @@ -5484,13 +5455,9 @@ ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0 -; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ] -; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ] ; SINK-AFTER-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE15]] ] ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE15]] ] @@ -5510,27 +5477,27 @@ ; SINK-AFTER-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ] ; SINK-AFTER-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 ; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; SINK-AFTER: pred.udiv.if4: +; SINK-AFTER: pred.udiv.if2: ; SINK-AFTER-NEXT: [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]] ; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1 ; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; SINK-AFTER: pred.udiv.continue5: +; SINK-AFTER: pred.udiv.continue3: ; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ] ; SINK-AFTER-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 ; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; SINK-AFTER: pred.udiv.if6: +; SINK-AFTER: pred.udiv.if4: ; SINK-AFTER-NEXT: [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]] ; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2 ; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; SINK-AFTER: pred.udiv.continue7: +; SINK-AFTER: pred.udiv.continue5: ; SINK-AFTER-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF6]] ] ; SINK-AFTER-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 ; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; SINK-AFTER: pred.udiv.if8: +; SINK-AFTER: pred.udiv.if6: ; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]] ; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3 ; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; SINK-AFTER: pred.udiv.continue9: +; SINK-AFTER: pred.udiv.continue7: ; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP21]], [[PRED_UDIV_IF8]] ] ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] @@ -5544,31 +5511,30 @@ ; SINK-AFTER: pred.store.continue: ; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 ; SINK-AFTER-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] -; SINK-AFTER: pred.store.if10: +; SINK-AFTER: pred.store.if8: ; SINK-AFTER-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 1 ; SINK-AFTER-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]] ; SINK-AFTER-NEXT: store i32 [[TMP3]], i32* [[TMP30]], align 4 ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE11]] -; SINK-AFTER: pred.store.continue11: +; SINK-AFTER: pred.store.continue9: ; SINK-AFTER-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 ; SINK-AFTER-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] -; SINK-AFTER: pred.store.if12: +; SINK-AFTER: pred.store.if10: ; SINK-AFTER-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 2 ; SINK-AFTER-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]] ; SINK-AFTER-NEXT: store i32 [[TMP4]], i32* [[TMP33]], align 4 ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE13]] -; SINK-AFTER: pred.store.continue13: +; SINK-AFTER: pred.store.continue11: ; SINK-AFTER-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 ; SINK-AFTER-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] -; SINK-AFTER: pred.store.if14: +; SINK-AFTER: pred.store.if12: ; SINK-AFTER-NEXT: [[TMP35:%.*]] = add i32 [[INDEX]], 3 ; SINK-AFTER-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]] ; SINK-AFTER-NEXT: store i32 [[TMP5]], i32* [[TMP36]], align 4 ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE15]] -; SINK-AFTER: pred.store.continue15: +; SINK-AFTER: pred.store.continue13: ; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]] @@ -5690,13 +5656,7 @@ ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i16 [[OFFSET_IDX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 7 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP8]] @@ -5802,9 +5762,6 @@ ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3 ; SINK-AFTER-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]] ; SINK-AFTER-NEXT: [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1490,66 +1490,66 @@ ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; VEC4_INTERL2: pred.store.if4: +; VEC4_INTERL2: pred.store.if3: ; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE5]] -; VEC4_INTERL2: pred.store.continue5: +; VEC4_INTERL2: pred.store.continue4: ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; VEC4_INTERL2: pred.store.if6: +; VEC4_INTERL2: pred.store.if5: ; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE7]] -; VEC4_INTERL2: pred.store.continue7: +; VEC4_INTERL2: pred.store.continue6: ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] -; VEC4_INTERL2: pred.store.if8: +; VEC4_INTERL2: pred.store.if7: ; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE9]] -; VEC4_INTERL2: pred.store.continue9: +; VEC4_INTERL2: pred.store.continue8: ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] -; VEC4_INTERL2: pred.store.if10: +; VEC4_INTERL2: pred.store.if9: ; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] ; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] -; VEC4_INTERL2: pred.store.continue11: +; VEC4_INTERL2: pred.store.continue10: ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] -; VEC4_INTERL2: pred.store.if12: +; VEC4_INTERL2: pred.store.if11: ; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE13]] -; VEC4_INTERL2: pred.store.continue13: +; VEC4_INTERL2: pred.store.continue12: ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] -; VEC4_INTERL2: pred.store.if14: +; VEC4_INTERL2: pred.store.if13: ; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE15]] -; VEC4_INTERL2: pred.store.continue15: +; VEC4_INTERL2: pred.store.continue14: ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] -; VEC4_INTERL2: pred.store.if16: +; VEC4_INTERL2: pred.store.if15: ; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE17]] -; VEC4_INTERL2: pred.store.continue17: +; VEC4_INTERL2: pred.store.continue16: ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -45,6 +45,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[COND:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%cond0> = icmp ir<%iv>, ir<13> ; CHECK-NEXT: WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20> @@ -57,7 +58,7 @@ ; CHECK-NEXT: CondBit: vp<[[COND]]> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2032,7 +2032,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] @@ -2062,7 +2061,6 @@ ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] ; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -2191,30 +2189,30 @@ ; UNROLL: pred.udiv.continue: ; UNROLL-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; UNROLL: pred.udiv.if4: +; UNROLL: pred.udiv.if3: ; UNROLL-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 ; UNROLL-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] ; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; UNROLL: pred.udiv.continue5: +; UNROLL: pred.udiv.continue4: ; UNROLL-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL: pred.udiv.if8: +; UNROLL: pred.udiv.if7: ; UNROLL-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 0 ; UNROLL-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP0]] ; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL: pred.udiv.continue9: +; UNROLL: pred.udiv.continue8: ; UNROLL-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]] -; UNROLL: pred.udiv.if10: +; UNROLL: pred.udiv.if9: ; UNROLL-NEXT: [[TMP19:%.*]] = or i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 1 ; UNROLL-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP19]] ; UNROLL-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP21]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL: pred.udiv.continue11: +; UNROLL: pred.udiv.continue10: ; UNROLL-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ] ; UNROLL-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], ; UNROLL-NEXT: [[TMP25:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> poison, <2 x i32> zeroinitializer @@ -2271,10 +2269,8 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE11:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE11]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE11]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE11]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] @@ -2296,32 +2292,32 @@ ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_UDIV_IF]] ] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; UNROLL-NO-IC: pred.udiv.if4: +; UNROLL-NO-IC: pred.udiv.if3: ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP16]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; UNROLL-NO-IC: pred.udiv.continue5: +; UNROLL-NO-IC: pred.udiv.continue4: ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF4]] ] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; UNROLL-NO-IC: pred.udiv.if8: +; UNROLL-NO-IC: pred.udiv.if7: ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; UNROLL-NO-IC: pred.udiv.continue9: +; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP22]], [[PRED_UDIV_IF8]] ] ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]] -; UNROLL-NO-IC: pred.udiv.if10: +; UNROLL-NO-IC: pred.udiv.if9: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = udiv i32 [[TMP26]], [[TMP25]] ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP27]], i32 1 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]] -; UNROLL-NO-IC: pred.udiv.continue11: +; UNROLL-NO-IC: pred.udiv.continue10: ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP28]], [[PRED_UDIV_IF10]] ] ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT7]], @@ -2330,7 +2326,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP33]] = add <2 x i32> [[PREDPHI12]], [[VEC_PHI2]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2392,66 +2387,66 @@ ; INTERLEAVE: pred.udiv.continue: ; INTERLEAVE-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] -; INTERLEAVE: pred.udiv.if4: +; INTERLEAVE: pred.udiv.if3: ; INTERLEAVE-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP12]], i64 1 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE5]] -; INTERLEAVE: pred.udiv.continue5: +; INTERLEAVE: pred.udiv.continue4: ; INTERLEAVE-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] -; INTERLEAVE: pred.udiv.if6: +; INTERLEAVE: pred.udiv.if5: ; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = udiv i32 [[TMP16]], [[TMP15]] ; INTERLEAVE-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP17]], i64 2 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE7]] -; INTERLEAVE: pred.udiv.continue7: +; INTERLEAVE: pred.udiv.continue6: ; INTERLEAVE-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP18]], [[PRED_UDIV_IF6]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] -; INTERLEAVE: pred.udiv.if8: +; INTERLEAVE: pred.udiv.if7: ; INTERLEAVE-NEXT: [[TMP20:%.*]] = or i32 [[INDEX]], 3 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = udiv i32 [[TMP21]], [[TMP20]] ; INTERLEAVE-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP22]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE9]] -; INTERLEAVE: pred.udiv.continue9: +; INTERLEAVE: pred.udiv.continue8: ; INTERLEAVE-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP23]], [[PRED_UDIV_IF8]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] -; INTERLEAVE: pred.udiv.if12: +; INTERLEAVE: pred.udiv.if11: ; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 0 ; INTERLEAVE-NEXT: [[TMP26:%.*]] = udiv i32 [[TMP25]], [[TMP0]] ; INTERLEAVE-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i64 0 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE13]] -; INTERLEAVE: pred.udiv.continue13: +; INTERLEAVE: pred.udiv.continue12: ; INTERLEAVE-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE9]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] -; INTERLEAVE: pred.udiv.if14: +; INTERLEAVE: pred.udiv.if13: ; INTERLEAVE-NEXT: [[TMP29:%.*]] = or i32 [[INDEX]], 5 ; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 1 ; INTERLEAVE-NEXT: [[TMP31:%.*]] = udiv i32 [[TMP30]], [[TMP29]] ; INTERLEAVE-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i64 1 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE15]] -; INTERLEAVE: pred.udiv.continue15: +; INTERLEAVE: pred.udiv.continue14: ; INTERLEAVE-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] -; INTERLEAVE: pred.udiv.if16: +; INTERLEAVE: pred.udiv.if15: ; INTERLEAVE-NEXT: [[TMP34:%.*]] = or i32 [[INDEX]], 6 ; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 2 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = udiv i32 [[TMP35]], [[TMP34]] ; INTERLEAVE-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 2 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE17]] -; INTERLEAVE: pred.udiv.continue17: +; INTERLEAVE: pred.udiv.continue16: ; INTERLEAVE-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]] -; INTERLEAVE: pred.udiv.if18: +; INTERLEAVE: pred.udiv.if17: ; INTERLEAVE-NEXT: [[TMP39:%.*]] = or i32 [[INDEX]], 7 ; INTERLEAVE-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 3 ; INTERLEAVE-NEXT: [[TMP41:%.*]] = udiv i32 [[TMP40]], [[TMP39]] ; INTERLEAVE-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE19]] -; INTERLEAVE: pred.udiv.continue19: +; INTERLEAVE: pred.udiv.continue18: ; INTERLEAVE-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ] ; INTERLEAVE-NEXT: [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], ; INTERLEAVE-NEXT: [[TMP45:%.*]] = shufflevector <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i32> zeroinitializer @@ -2544,7 +2539,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 @@ -2557,7 +2551,6 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 ; CHECK-NEXT: store i16 [[TMP10]], i16* [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -2707,14 +2700,12 @@ ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[BROADCAST_SPLAT7]], [[STEP_ADD3]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = trunc <2 x i32> [[TMP7]] to <2 x i16> @@ -2732,7 +2723,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], i16* [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -3565,16 +3555,12 @@ ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] ; CHECK-NEXT: [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], ; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0 ; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP12]] @@ -3584,7 +3570,6 @@ ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] @@ -3715,9 +3700,9 @@ ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]] -; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NEXT: [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64 ; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]] ; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* @@ -3783,23 +3768,18 @@ ; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] ; UNROLL-NO-IC-NEXT: [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 -; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 @@ -3809,7 +3789,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP20]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] @@ -3871,9 +3850,9 @@ ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], ; INTERLEAVE-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]] -; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], ; INTERLEAVE-NEXT: [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* @@ -3972,16 +3951,12 @@ ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4 ; CHECK-NEXT: [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP12]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], ; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0 ; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP13]] @@ -3991,7 +3966,6 @@ ; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] @@ -4128,9 +4102,9 @@ ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]] -; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NEXT: [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]] ; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* @@ -4199,23 +4173,18 @@ ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4 ; UNROLL-NO-IC-NEXT: [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP12]] -; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 -; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add i8 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 @@ -4225,7 +4194,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>* ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP21]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] @@ -4290,9 +4258,9 @@ ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], ; INTERLEAVE-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]] -; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], ; INTERLEAVE-NEXT: [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]] ; INTERLEAVE-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* @@ -4375,7 +4343,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* @@ -4487,9 +4454,7 @@ ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 @@ -4593,7 +4558,6 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP5]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* @@ -4728,9 +4692,7 @@ ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[TMP5]], 1 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[TMP5]], 2 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[TMP5]], 3 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 @@ -4843,7 +4805,6 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* @@ -4973,9 +4934,7 @@ ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 @@ -6172,7 +6131,6 @@ ; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[SRC:%.*]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0 @@ -6301,9 +6259,7 @@ ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = load i32, i32* [[SRC:%.*]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -141,9 +141,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -207,7 +207,7 @@ ; CHECK-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.+]], %vector.body ] ; VEC-NEXT: [[VEC_IND:%.+]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] ; CHECK-NEXT: [[IV_0:%.+]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[IV_1:%.+]] = add i64 [[INDEX]], 1 +; VEC-NOT: add i64 [[INDEX]], 1 ; CHECK-NOT: [[IV_2_0:%.+]] = add i32 %offset.idx, 0 ; CHECK-LABEL: scalar.ph: ; CHECK-NEXT: {{.+}} = phi i64 [ 1002, %middle.block ], [ 0, %entry ] diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -59,8 +59,6 @@ ; TAILFOLD: vector.body: ; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] -; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 ; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> ; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 @@ -130,7 +128,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -138,7 +135,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -266,7 +262,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -274,7 +269,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -350,7 +344,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -358,7 +351,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -436,7 +428,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -444,7 +435,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -523,7 +513,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -531,7 +520,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -614,7 +602,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] @@ -622,7 +609,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* ; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -711,7 +697,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 @@ -1040,7 +1025,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 @@ -1064,7 +1048,6 @@ ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -1145,7 +1128,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] @@ -1154,7 +1136,6 @@ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -41,9 +41,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -19,44 +19,42 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector [[DOTSPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector [[DOTSPLATINSERT4]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add [[DOTSPLAT5]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = add [[DOTSPLAT3]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = shl i32 [[TMP12]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to * -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , * [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw [[WIDE_LOAD]], [[TMP5]] -; CHECK-NEXT: [[TMP18:%.*]] = add nsw [[WIDE_LOAD6]], [[TMP9]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64* [[TMP19]] to * -; CHECK-NEXT: store [[TMP17]], * [[TMP20]], align 8 -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP22:%.*]] = shl i32 [[TMP21]], 1 -; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = bitcast i64* [[TMP24]] to * -; CHECK-NEXT: store [[TMP18]], * [[TMP25]], align 8 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP27]] -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = shl i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to * +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , * [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to * +; CHECK-NEXT: store [[TMP14]], * [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, i64* [[TMP16]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i64* [[TMP21]] to * +; CHECK-NEXT: store [[TMP15]], * [[TMP22]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[TMP23]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -66,8 +64,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP29:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP29]], [[I_08]] +; CHECK-NEXT: [[TMP26:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP26]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -109,41 +107,39 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector [[DOTSPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector [[DOTSPLATINSERT4]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT5]], [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = add [[DOTSPLAT3]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to * -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , * [[TMP14]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[WIDE_LOAD]], [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD6]], [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * -; CHECK-NEXT: store [[TMP15]], * [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, i64* [[TMP17]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i64* [[TMP21]] to * -; CHECK-NEXT: store [[TMP16]], * [[TMP22]], align 8 -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = shl i64 [[TMP23]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]] -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to * +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , * [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP14]] to * +; CHECK-NEXT: store [[TMP12]], * [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[TMP14]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP18]] to * +; CHECK-NEXT: store [[TMP13]], * [[TMP19]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -153,8 +149,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP26:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP26]], [[I_08]] +; CHECK-NEXT: [[TMP23:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[I_08]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -17,10 +17,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* @@ -107,10 +106,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, i16* [[TMP5]], i32 0 @@ -180,7 +178,6 @@ ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]] @@ -248,7 +245,6 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -101,10 +101,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[MASK1]], ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[MASK1]], <4 x i1> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -55,12 +55,13 @@ ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ir<%arrayidx>, ir<%z> ; CHECK-NEXT: WIDEN-SELECT ir<%sel> = select ir<%cmp>, ir<1.000000e+01>, ir<2.000000e+01> ; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel> -; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%x>, ir<%iv> +; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%add> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> @@ -136,6 +137,7 @@ ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %i = phi 0, %i.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ir<%i>, ir<5> ; CHECK-NEXT: Successor(s): if.then ; CHECK-EMPTY: @@ -149,7 +151,7 @@ ; CHECK-NEXT: CondBit: ir<%cmp> ; CHECK-EMPTY: ; CHECK-NEXT: pred.udiv.if: -; CHECK-NEXT: REPLICATE ir<%tmp4> = udiv ir<%n>, ir<%i> (S->V) +; CHECK-NEXT: REPLICATE ir<%tmp4> = udiv ir<%n>, vp<[[STEPS]]> (S->V) ; CHECK-NEXT: Successor(s): pred.udiv.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.udiv.continue: @@ -164,7 +166,7 @@ ; CHECK-NEXT: for.inc: ; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%cmp> ; CHECK-NEXT: BLEND %d = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp> -; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, ir<%i> +; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%idx>, ir<%d> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -19,6 +19,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 @@ -32,11 +33,11 @@ ; CHECK-NEXT: CondBit: vp<[[MASK]]> (loop) ; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> ; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]] ; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -46,8 +47,8 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -85,6 +86,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -95,7 +97,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK]]> (loop) ; CHECK: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue @@ -125,8 +127,8 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -164,6 +166,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -174,7 +177,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK]]> (loop) ; CHECK: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -204,8 +207,8 @@ ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -245,6 +248,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 21, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<21>, ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]> vp<[[BTC]]> ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0> @@ -282,7 +286,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK2]]> (loop.then) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep.B> = getelementptr ir<%B>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.B> = getelementptr ir<%B>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store vp<[[PRED]]>, ir<%gep.B> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -335,6 +339,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -351,7 +356,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK2]]> (then.0) ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: @@ -388,8 +393,8 @@ ; CHECK-NEXT: Successor(s): next.0.0 ; CHECK-EMPTY: ; CHECK-NEXT: next.0.0: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -436,6 +441,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -453,7 +459,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK2]]> (then.0) ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: @@ -497,8 +503,8 @@ ; CHECK-NEXT: Successor(s): next.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.1: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -552,6 +558,7 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -568,7 +575,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK2]]> (then.0) ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: @@ -611,8 +618,8 @@ ; CHECK-NEXT: Successor(s): next.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.1: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -662,8 +669,9 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: @@ -683,9 +691,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> -; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%lv.a>, ir<%gep.c> ; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -713,7 +721,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK2]]> (then.0) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep.c.1> = getelementptr ir<@c>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.c.1> = getelementptr ir<@c>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.c.1> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -726,8 +734,8 @@ ; CHECK-NEXT: Successor(s): latch ; CHECK-EMPTY: ; CHECK-NEXT: latch: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -777,8 +785,9 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: @@ -807,8 +816,8 @@ ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -845,8 +854,9 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> -; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: ; CHECK-NEXT: pred.load: { @@ -890,8 +900,8 @@ ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> -; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: CLONE ir<%large> = icmp vp<[[STEPS]]>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp vp<[[STEPS]]>, ir<%k> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -996,8 +1006,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, ir<%iv> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): loop.body ; CHECK-EMPTY: ; CHECK-NEXT: loop.body: @@ -1016,7 +1026,7 @@ ; CHECK-NEXT: CondBit: vp<[[MASK]]> (then) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%addr>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<1.000000e+01>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: