diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1133,7 +1133,8 @@ // will lead to gather/scatter instructions, which don't need to be // handled. if (isa(CurRec) || - isa(CurRec) || isa(CurRec)) + isa(CurRec) || isa(CurRec) || + isa(CurRec)) continue; // This recipe contributes to the address computation of a widen @@ -2765,24 +2766,16 @@ return; } - // Try to create a new independent vector induction variable. If we can't + // Create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); - // Create scalar steps that can be used by instructions we will later - // scalarize. Note that the addition of the scalar steps will not increase - // the number of instructions in the loop in the common case prior to - // InstCombine. We will be trading one vector extract for each scalar step. - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; - } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. + assert(!shouldScalarizeInstruction(EntryVal)); + createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); + // Create scalar steps that can be used by instructions we will later + // scalarize. Note that the addition of the scalar steps will not increase + // the number of instructions in the loop in the common case prior to + // InstCombine. We will be trading one vector extract for each scalar step. buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } @@ -7926,12 +7919,15 @@ // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; + VPTransformState State{BestVF, BestUF, LI, + DT, PSE.getSE(), ILV.Builder, + &ILV, &BestVPlan, ILV.OldInduction}; Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.VectorTripCount = ILV.getOrCreateVectorTripCount(nullptr); + State.OldInduction = ILV.OldInduction; ILV.collectPoisonGeneratingRecipes(State); // When vectorizing the epilogue loop, the canonical induction start value @@ -8424,6 +8420,11 @@ VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); + while (NewInsertionPoint != Builder.getInsertBlock()->end() && + isa(*NewInsertionPoint)) + ++NewInsertionPoint; + Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); + if (TailFolded && CM.TTI.emitGetActiveLaneMask()) { // While ActiveLaneMask is a binary op that consumes the loop tripcount // as a second argument, we only pass the IV here and extract the @@ -8533,7 +8534,7 @@ return nullptr; } -VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( +VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionTruncate( TruncInst *I, ArrayRef Operands, VFRange &Range, VPlan &Plan) const { // Optimize the special case where the source is a constant integer @@ -8552,9 +8553,12 @@ if (LoopVectorizationPlanner::getDecisionAndClampRange( isOptimizableIVTruncate(I), Range)) { - auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); + + if (auto *R = tryToBuildScalarSteps(Phi, I, Range, Plan)) + return R; + VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); } @@ -8916,6 +8920,37 @@ return toVPRecipeResult(tryToWiden(Instr, Operands)); } +VPScalarIVStepsRecipe * +VPRecipeBuilder::tryToBuildScalarSteps(Instruction *MaybePhi, + Instruction *ScalarStepsForI, + VFRange &Range, VPlan &Plan) const { + auto *Phi = dyn_cast(MaybePhi); + if (!Phi) + return nullptr; + auto *II = Legal->getIntOrFpInductionDescriptor(Phi); + if (!II) + return nullptr; + + if (onlyScalarStepsNeeded(ScalarStepsForI, Range)) { + const SCEV *StepSCEV = II->getStep(); + VPValue *Step = nullptr; + + if (auto *E = dyn_cast(StepSCEV)) + Step = Plan.getOrAddVPValue(E->getValue()); + else if (auto *E = dyn_cast(StepSCEV)) + Step = Plan.getOrAddVPValue(E->getValue()); + else + Step = new VPExpandSCEVRecipe(II->getStep(), *PSE.getSE()); + + if (ScalarStepsForI == Phi) + return new VPScalarIVStepsRecipe(Phi, *II, Plan.getCanonicalIV(), Step); + return new VPScalarIVStepsRecipe(Phi, *II, ScalarStepsForI, + Plan.getCanonicalIV(), Step); + } + + return nullptr; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -9075,7 +9110,10 @@ DFS.perform(LI); VPBasicBlock *VPBB = HeaderVPBB; - SmallVector InductionsToMove; + SmallVector InductionsToMove; + DenseMap VectorIVs; + DenseMap ScalarIVs; + SmallVector MoveAfterPhis; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. @@ -9102,6 +9140,17 @@ auto OpRange = Plan->mapToVPValues(Instr->operands()); Operands = {OpRange.begin(), OpRange.end()}; } + + if (auto *Steps = + RecipeBuilder.tryToBuildScalarSteps(Instr, Instr, Range, *Plan)) { + Plan->addVPValue(Instr, Steps); + RecipeBuilder.setRecipe(Instr, Steps); + MoveAfterPhis.push_back(Steps); + if (auto *StepR = + dyn_cast_or_null(Steps->getOperand(1)->getDef())) + MoveAfterPhis.push_back(StepR); + continue; + } if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, Plan)) { // If Instr can be simplified to an existing VPValue, use it. @@ -9110,8 +9159,9 @@ Plan->addVPValue(Instr, VPV); // If the re-used value is a recipe, register the recipe for the // instruction, in case the recipe for Instr needs to be recorded. - if (auto *R = dyn_cast_or_null(VPV->getDef())) + if (auto *R = dyn_cast_or_null(VPV->getDef())) { RecipeBuilder.setRecipe(Instr, R); + } continue; } // Otherwise, add the new recipe. @@ -9121,6 +9171,7 @@ Plan->addVPValue(UV, Def); } + RecipeBuilder.setRecipe(Instr, Recipe); if (isa(Recipe) && HeaderVPBB->getFirstNonPhi() != VPBB->end()) { // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section @@ -9129,10 +9180,17 @@ // block after applying SinkAfter, which relies on the original // position of the trunc. assert(isa(Instr)); - InductionsToMove.push_back( - cast(Recipe)); + InductionsToMove.push_back(Recipe); } - RecipeBuilder.setRecipe(Instr, Recipe); + + if (isa(Recipe)) { + MoveAfterPhis.push_back(Recipe); + VPValue *Step = Recipe->getOperand(1); + if (auto *StepR = dyn_cast_or_null(Step->getDef())) + MoveAfterPhis.push_back(StepR); + continue; + } + VPBB->appendRecipe(Recipe); continue; } @@ -9254,9 +9312,13 @@ // Now that sink-after is done, move induction recipes for optimized truncates // to the phi section of the header block. - for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove) + for (VPRecipeBase *Ind : InductionsToMove) Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); + for (auto *R : MoveAfterPhis) { + R->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); + } + // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(cast(TopRegion->getExit()), Plan, RecipeBuilder, Range.Start); @@ -9749,6 +9811,32 @@ getTruncInst(), getVPValue(0), State, CanonicalIV); } +void VPScalarIVStepsRecipe::execute(VPTransformState &State) { + Value *Step = State.get(getOperand(1), VPIteration(0, 0)); + auto CreateScalarIV = [&](Value *&Step) -> Value * { + Value *ScalarIV = State.get(getOperand(0), VPIteration(0, 0)); + if (IV != State.OldInduction) { + ScalarIV = IV->getType()->isIntegerTy() + ? State.Builder.CreateSExtOrTrunc(ScalarIV, IV->getType()) + : State.Builder.CreateCast(Instruction::SIToFP, ScalarIV, + IV->getType()); + ScalarIV = emitTransformedIndex(State.Builder, ScalarIV, Step, IndDesc); + ScalarIV->setName("offset.idx"); + } + if (Trunc) { + auto *TruncType = cast(Trunc->getType()); + assert(Step->getType()->isIntegerTy() && + "Truncation requires an integer step"); + ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncType); + Step = State.Builder.CreateTrunc(Step, TruncType); + } + return ScalarIV; + }; + + Value *ScalarIV = CreateScalarIV(Step); + buildScalarSteps(ScalarIV, Step, IV, IndDesc, this, State); +} + void VPWidenPHIRecipe::execute(VPTransformState &State) { State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), this, State); @@ -10156,7 +10244,8 @@ // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, {Part, LastLane})) { // At the moment, VPWidenIntOrFpInductionRecipes can also be uniform. - assert(isa(Def->getDef()) && + assert((isa(Def->getDef()) || + isa(Def->getDef())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -79,9 +79,10 @@ /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. - VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef Operands, - VFRange &Range, VPlan &Plan) const; + VPRecipeBase *tryToOptimizeInductionTruncate(TruncInst *I, + ArrayRef Operands, + VFRange &Range, + VPlan &Plan) const; /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned @@ -171,6 +172,10 @@ Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan); + VPScalarIVStepsRecipe *tryToBuildScalarSteps(Instruction *Instr, + Instruction *ScalarStepsForI, + VFRange &Range, + VPlan &Plan) const; /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. void fixHeaderPhis(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -210,10 +210,11 @@ /// needed for generating the output IR. struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, - DominatorTree *DT, IRBuilder<> &Builder, - InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), - Plan(Plan) {} + DominatorTree *DT, ScalarEvolution *SE, IRBuilder<> &Builder, + InnerLoopVectorizer *ILV, VPlan *Plan, + Instruction *OldInduction) + : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), SE(SE), Builder(Builder), + ILV(ILV), Plan(Plan), OldInduction(OldInduction) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; @@ -347,6 +348,8 @@ /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. DominatorTree *DT; + ScalarEvolution *SE; + /// Hold a reference to the IRBuilder used to generate output IR code. IRBuilder<> &Builder; @@ -367,6 +370,8 @@ /// Holds recipes that may generate a poison value that is used after /// vectorization, even when their operands are not poison. SmallPtrSet MayGeneratePoisonRecipes; + + Instruction *OldInduction; }; /// VPUsers instance used by VPBlockBase to manage CondBit and the block @@ -713,6 +718,7 @@ /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + void insertBefore(VPBasicBlock &BB, iplist::iterator I); /// Insert an unlinked Recipe into a basic block immediately after /// the specified Recipe. @@ -1085,6 +1091,42 @@ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } }; +/// A recipe for handling phi nodes of integer and floating-point inductions, +/// producing their vector and scalar values. +class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { + PHINode *IV; + const InductionDescriptor &IndDesc; + Instruction *Trunc = nullptr; + +public: + VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc, + VPValue *CanonicalIV, VPValue *Step) + : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Step}), VPValue(IV, this), + IV(IV), IndDesc(IndDesc) {} + + VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc, + Instruction *Trunc, VPValue *CanonicalIV, VPValue *Step) + : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Step}), + VPValue(Trunc, this), IV(IV), IndDesc(IndDesc), Trunc(Trunc) {} + + ~VPScalarIVStepsRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + + /// Generate the vectorized and scalarized versions of the phi node as + /// needed by their users. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling first order recurrences and pointer inductions. For /// first-order recurrences, the start value is the first operand of the recipe /// and the incoming value from the backedge is the second operand. It also diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -631,14 +631,19 @@ insertAfter(InsertPos); } -void VPRecipeBase::moveBefore(VPBasicBlock &BB, - iplist::iterator I) { +void VPRecipeBase::insertBefore(VPBasicBlock &BB, + iplist::iterator I) { assert(I == BB.end() || I->getParent() == &BB); - removeFromParent(); Parent = &BB; BB.getRecipeList().insert(I, this); } +void VPRecipeBase::moveBefore(VPBasicBlock &BB, + iplist::iterator I) { + removeFromParent(); + insertBefore(BB, I); +} + void VPInstruction::generateInstruction(VPTransformState &State, unsigned Part) { IRBuilder<> &Builder = State.Builder; @@ -1229,6 +1234,12 @@ O << " " << VPlanIngredient(IV); } +void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "SCALAR-STEPS "; + printOperands(O, SlotTracker); +} + void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -332,6 +332,7 @@ VPWidenSC, VPWidenSelectSC, VPStepVectorSC, + VPScalarIVStepsSC, // Phi-like recipes. Need to be kept together. VPBlendSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2> @@ -46,8 +46,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP5]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 @@ -59,6 +58,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = add [[DOTSPLAT]], [[TMP11]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, [[TMP13]], i64 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to * diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -23,10 +23,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* @@ -36,7 +36,7 @@ ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -52,7 +52,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -147,27 +147,27 @@ ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] ; CHECK: vector.body9: ; CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT37:%.*]], [[PRED_STORE_CONTINUE36:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX38]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX38]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT27]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT28]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX38]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT22]], +; CHECK-NEXT: [[OFFSET_IDX25:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX38]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] ; CHECK: pred.store.if24: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX25]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX25]] ; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 ; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX25]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] ; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] ; CHECK: pred.store.if26: -; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX25]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]] @@ -180,7 +180,7 @@ ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] ; CHECK: pred.store.if28: -; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX25]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]] @@ -193,7 +193,7 @@ ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]] ; CHECK: pred.store.if30: -; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX25]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]] @@ -277,16 +277,16 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT13]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP8]], align 16 ; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: @@ -294,33 +294,33 @@ ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] ; CHECK: pred.store.if14: ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 -; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP5]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] ; CHECK: pred.store.continue15: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] ; CHECK: pred.store.if16: ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 -; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 +; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP6]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] ; CHECK: pred.store.continue17: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] ; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 -; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 +; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] ; CHECK: pred.store.continue19: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -162,8 +162,8 @@ ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[STEPVEC:%.+]]> = step-vector vp<[[CAN_IV]]> ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[STEPVEC]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1471,8 +1471,8 @@ ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] -; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 4 -; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 @@ -1485,13 +1485,13 @@ ; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL2: pred.store.if: ; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] -; VEC4_INTERL2-NEXT: store float [[TMP1]], float* [[TMP9]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP0]], float* [[TMP9]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL2: pred.store.if4: -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 @@ -1500,7 +1500,7 @@ ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; VEC4_INTERL2: pred.store.if6: -; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP1]], 2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 @@ -1509,7 +1509,7 @@ ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; VEC4_INTERL2: pred.store.if8: -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP1]], 3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 @@ -1518,15 +1518,15 @@ ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; VEC4_INTERL2: pred.store.if10: -; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP1]], 4.000000e+00 -; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]] +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] ; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] ; VEC4_INTERL2: pred.store.continue11: ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; VEC4_INTERL2: pred.store.if12: -; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP1]], 5.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 @@ -1535,7 +1535,7 @@ ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; VEC4_INTERL2: pred.store.if14: -; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP1]], 6.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 @@ -1544,7 +1544,7 @@ ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] ; VEC4_INTERL2: pred.store.if16: -; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP1]], 7.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -548,10 +548,10 @@ ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; VEC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP1]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 ; VEC-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <2 x i8>* ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, <2 x i8>* [[TMP4]], align 1 @@ -561,7 +561,7 @@ ; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 ; VEC-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 -; VEC-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* undef, i64 [[TMP1]] ; VEC-NEXT: store i8 [[TMP8]], i8* [[TMP9]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -159,9 +159,9 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 @@ -3826,9 +3826,9 @@ ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 3 +; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP22]] ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 @@ -5277,10 +5277,10 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -6258,17 +6258,17 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[SRC:%.*]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* @@ -6391,27 +6391,27 @@ ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], 2 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 3 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = load i32, i32* [[SRC:%.*]], align 4 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = load i32, i32* [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP6]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]] +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[TMP13]], i32 0 @@ -6585,12 +6585,12 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 ; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP25]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP21]], <2 x i32>* [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -6817,19 +6817,19 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP22]] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP23]] +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP24]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to <2 x i32>* -; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP23]], <2 x i32>* [[TMP28]], align 4 +; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], <2 x i32>* [[TMP28]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>* -; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP24]], <2 x i32>* [[TMP30]], align 4 +; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP30]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -190,10 +190,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], -1 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[N]] ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP16]] @@ -203,7 +203,7 @@ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP20]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = fadd fast <4 x float> [[REVERSE]], -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[TMP24]], align 4 @@ -226,10 +226,10 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[OFFSET_IDX9:%.*]] = trunc i64 [[INDEX4]] to i32 -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX9]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP27]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX9]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP26]], -1 ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], [[N]] ; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP30]] @@ -239,7 +239,7 @@ ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP34]], align 4 ; CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP26]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 0 ; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP35]], <4 x float>* [[TMP38]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -145,14 +145,14 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to <4 x i8*>* ; CHECK-NEXT: store <4 x i8*> [[TMP3]], <4 x i8*>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -29,35 +29,35 @@ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], -; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX5]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[OFFSET_IDX]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INC]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = mul i8 0, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 0, [[INC]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX3]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]] ; CHECK: pred.store.if4: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] ; CHECK: pred.store.continue5: -; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add i8 [[TMP6]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -17,11 +17,11 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -6,9 +6,9 @@ ; PR15882 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %offset.idx = sub i64 %startval, %index -; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0 -; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4 +; CHECK: %offset.idx3 = sub i64 %startval, %index +; CHECK: %[[a0:.+]] = add i64 %offset.idx3, 0 +; CHECK: %[[a4:.+]] = add i64 %offset.idx3, -4 define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) { entry: @@ -32,9 +32,9 @@ ; CHECK-LABEL: @reverse_induction_i128( ; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %offset.idx = sub i128 %startval, %index -; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0 -; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4 +; CHECK: %offset.idx3 = sub i128 %startval, %index +; CHECK: %[[a0:.+]] = add i128 %offset.idx3, 0 +; CHECK: %[[a4:.+]] = add i128 %offset.idx3, -4 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -13,18 +13,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[X]] -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], [[X]] ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -25,11 +25,11 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -60,7 +60,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -130,7 +130,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -156,7 +156,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -220,7 +220,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -241,7 +241,7 @@ ; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -348,7 +348,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP9:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -404,7 +404,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, 1000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -421,7 +421,7 @@ ; CHECK-NEXT: store i32 [[P]], i32* [[GEP_PTR]], align 4 ; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP11:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -10,11 +10,11 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[MASK1]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i16> [[BROADCAST_SPLAT2]], <4 x i16> undef ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[PREDPHI]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -26,10 +26,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* @@ -39,7 +39,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 @@ -94,10 +94,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* @@ -107,7 +107,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -123,7 +123,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "for.body:\l" + ; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + -; CHECK-NEXT: " WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + +; CHECK-NEXT: " SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, ir\<%iv\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + ; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>)\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) @@ -90,8 +90,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) @@ -194,7 +194,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<4> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, ir<%iv> ; CHECK-NEXT: INTERLEAVE-GROUP with factor 4 at %AB.0, ir<%gep.AB.0> ; CHECK-NEXT: ir<%AB.0> = load from index 0 @@ -258,8 +258,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%l.a> = load ir<%arrayidx> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%b>, ir<%iv> @@ -295,7 +295,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr ir<%asd>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd> ; CHECK-NEXT: WIDEN ir<%psd> = add ir<%lsd>, ir<23> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -893,7 +893,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: Successor(s): loop.then ; CHECK-EMPTY: ; CHECK-NEXT: loop.then: