diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -556,6 +556,12 @@ /// vector of instructions. void addMetadata(ArrayRef To, Instruction *From); + /// The new Induction variable which was added to the new block. + PHINode *Induction = nullptr; + + /// The induction variable of the old basic block. + PHINode *OldInduction = nullptr; + protected: friend class LoopVectorizationPlanner; @@ -767,12 +773,6 @@ /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; - /// The new Induction variable which was added to the new block. - PHINode *Induction = nullptr; - - /// The induction variable of the old basic block. - PHINode *OldInduction = nullptr; - /// Store instructions that were predicated. SmallVector PredicatedInstructions; @@ -2773,26 +2773,16 @@ return; } - // Try to create a new independent vector induction variable. If we can't + // Create a new independent vector induction variable. If we can't // create the phi node, we will splat the scalar induction variable in each // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); - // Create scalar steps that can be used by instructions we will later - // scalarize. Note that the addition of the scalar steps will not increase - // the number of instructions in the loop in the common case prior to - // InstCombine. We will be trading one vector extract for each scalar step. - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; - } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. + assert(!shouldScalarizeInstruction(EntryVal)); + createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); + // Create scalar steps that can be used by instructions we will later + // scalarize. Note that the addition of the scalar steps will not increase + // the number of instructions in the loop in the common case prior to + // InstCombine. We will be trading one vector extract for each scalar step. buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } @@ -7970,10 +7960,14 @@ // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; + VPTransformState State{BestVF, BestUF, LI, DT, + PSE.getSE(), ILV.Builder, &ILV, &BestVPlan, + ILV.Induction, ILV.OldInduction}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; + State.Induction = ILV.Induction; + State.OldInduction = ILV.OldInduction; ILV.collectPoisonGeneratingRecipes(State); ILV.printDebugTracesAtStart(); @@ -8469,6 +8463,11 @@ VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); + while (NewInsertionPoint != Builder.getInsertBlock()->end() && + isa(*NewInsertionPoint)) + ++NewInsertionPoint; + Builder.setInsertPoint(Builder.getInsertBlock(), NewInsertionPoint); + if (TailFolded && CM.TTI.emitGetActiveLaneMask()) { // While ActiveLaneMask is a binary op that consumes the loop tripcount // as a second argument, we only pass the IV here and extract the @@ -8558,7 +8557,7 @@ return nullptr; } -VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( +VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionTruncate( TruncInst *I, ArrayRef Operands, VFRange &Range, VPlan &Plan) const { // Optimize the special case where the source is a constant integer @@ -8577,9 +8576,13 @@ if (LoopVectorizationPlanner::getDecisionAndClampRange( isOptimizableIVTruncate(I), Range)) { - auto *Phi = cast(I->getOperand(0)); const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi); + + if (onlyScalarStepsNeeded(I, Range)) + return new VPScalarIVStepsRecipe(Phi, II, !CM.isScalarEpilogueAllowed(), + I); + VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I); } @@ -8916,6 +8919,46 @@ return toVPRecipeResult(tryToWiden(Instr, Operands)); } +bool VPRecipeBuilder::onlyScalarStepsNeeded(Instruction *Instr, + VFRange &Range) const { + auto ShouldScalarizeInstruction = [this](Instruction *I, ElementCount VF) { + return CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF); + }; + + auto NeedsScalarInduction = [&](ElementCount VF) { + if (ShouldScalarizeInstruction(Instr, VF)) + return true; + auto IsScalarInst = [&](User *U) -> bool { + auto *I = cast(U); + return (OrigLoop->contains(I) && ShouldScalarizeInstruction(I, VF)); + }; + return llvm::any_of(Instr->users(), IsScalarInst); + }; + + return LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return VF.isVector() && ShouldScalarizeInstruction(Instr, VF) && + NeedsScalarInduction(VF); + }, + Range); +} + +VPScalarIVStepsRecipe *VPRecipeBuilder::tryToBuildScalarSteps( + Instruction *Instr, ArrayRef Operands, VFRange &Range) { + auto *Phi = dyn_cast(Instr); + if (!Phi) + return nullptr; + auto *II = Legal->getIntOrFpInductionDescriptor(Phi); + if (!II) + return nullptr; + + if (onlyScalarStepsNeeded(Phi, Range)) + return new VPScalarIVStepsRecipe(Phi, *II, !CM.isScalarEpilogueAllowed()); + + return nullptr; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -9041,7 +9084,10 @@ DFS.perform(LI); VPBasicBlock *VPBB = HeaderVPBB; - SmallVector InductionsToMove; + SmallVector InductionsToMove; + DenseMap VectorIVs; + DenseMap ScalarIVs; + SmallVector MoveAfterPhis; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. @@ -9068,6 +9114,14 @@ auto OpRange = Plan->mapToVPValues(Instr->operands()); Operands = {OpRange.begin(), OpRange.end()}; } + + if (auto *Steps = + RecipeBuilder.tryToBuildScalarSteps(Instr, Operands, Range)) { + Plan->addVPValue(Instr, Steps); + RecipeBuilder.setRecipe(Instr, Steps); + MoveAfterPhis.push_back(Steps); + continue; + } if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, Plan)) { // If Instr can be simplified to an existing VPValue, use it. @@ -9087,7 +9141,8 @@ Plan->addVPValue(UV, Def); } - if (isa(Recipe) && + if ((isa(Recipe) || + isa(Recipe)) && HeaderVPBB->getFirstNonPhi() != VPBB->end()) { // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section // of the header block. That can happen for truncates of induction @@ -9095,8 +9150,7 @@ // block after applying SinkAfter, which relies on the original // position of the trunc. assert(isa(Instr)); - InductionsToMove.push_back( - cast(Recipe)); + InductionsToMove.push_back(Recipe); } RecipeBuilder.setRecipe(Instr, Recipe); VPBB->appendRecipe(Recipe); @@ -9119,6 +9173,10 @@ VPBB = NextVPBBForBB; } + for (auto *R : MoveAfterPhis) { + R->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); + } + // Fold the last, empty block into its predecessor. VPBasicBlock *PrevVPBB = cast(VPBB->getSinglePredecessor()); bool Folded = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB); @@ -9204,7 +9262,7 @@ // Now that sink-after is done, move induction recipes for optimized truncates // to the phi section of the header block. - for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove) + for (VPRecipeBase *Ind : InductionsToMove) Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); // Adjust the recipes for any inloop reductions. @@ -9690,6 +9748,72 @@ getTruncInst(), getVPValue(0), State); } +void VPScalarIVStepsRecipe::execute(VPTransformState &State) { + const DataLayout &DL = IV->getModule()->getDataLayout(); + // Generate code for the induction step. Note that induction steps are + // required to be loop-invariant + auto CreateStepValue = [&](const SCEV *Step) -> Value * { + if (State.SE->isSCEVable(IV->getType())) { + SCEVExpander Exp(*State.SE, DL, "induction"); + return Exp.expandCodeFor(Step, Step->getType(), + State.CFG.VectorPreHeader->getTerminator()); + } + return cast(Step)->getValue(); + }; + + Value *Step = CreateStepValue(IndDesc.getStep()); + auto CreateScalarIV = [&](Value *&Step) -> Value * { + Value *ScalarIV = State.Induction; + if (IV != State.OldInduction) { + ScalarIV = + IV->getType()->isIntegerTy() + ? State.Builder.CreateSExtOrTrunc(State.Induction, IV->getType()) + : State.Builder.CreateCast(Instruction::SIToFP, State.Induction, + IV->getType()); + ScalarIV = emitTransformedIndex(State.Builder, ScalarIV, State.SE, DL, + IndDesc, *State.LI, State.CFG.PrevBB); + ScalarIV->setName("offset.idx"); + } + if (Trunc) { + auto *TruncType = cast(Trunc->getType()); + assert(Step->getType()->isIntegerTy() && + "Truncation requires an integer step"); + ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncType); + Step = State.Builder.CreateTrunc(Step, TruncType); + } + return ScalarIV; + }; + + // Create the vector values from the scalar IV, in the absence of creating a + // vector IV. + auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { + Value *Broadcasted = + State.Builder.CreateVectorSplat(State.VF, ScalarIV, "broadcast"); + for (unsigned Part = 0; Part < State.UF; ++Part) { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *StartIdx; + if (Step->getType()->isFloatingPointTy()) + StartIdx = getRuntimeVFAsFloat(State.Builder, Step->getType(), + State.VF * Part); + else + StartIdx = + getRuntimeVF(State.Builder, Step->getType(), State.VF * Part); + + Value *EntryPart = + getStepVector(Broadcasted, StartIdx, Step, + IndDesc.getInductionOpcode(), State.VF, State.Builder); + State.set(this, EntryPart, Part); + if (Trunc) + State.ILV->addMetadata(EntryPart, Trunc); + } + }; + + Value *ScalarIV = CreateScalarIV(Step); + if (NeedSplatIV) + CreateSplatIV(ScalarIV, Step); + buildScalarSteps(ScalarIV, Step, IV, IndDesc, this, State); +} + void VPWidenPHIRecipe::execute(VPTransformState &State) { State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), this, State); @@ -10097,7 +10221,8 @@ // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, {Part, LastLane})) { // At the moment, VPWidenIntOrFpInductionRecipes can also be uniform. - assert(isa(Def->getDef()) && + assert((isa(Def->getDef()) || + isa(Def->getDef())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -79,9 +79,10 @@ /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. - VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef Operands, - VFRange &Range, VPlan &Plan) const; + VPRecipeBase *tryToOptimizeInductionTruncate(TruncInst *I, + ArrayRef Operands, + VFRange &Range, + VPlan &Plan) const; /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned @@ -104,6 +105,8 @@ /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue. VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; } + bool onlyScalarStepsNeeded(Instruction *I, VFRange &Range) const; + public: VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, LoopVectorizationLegality *Legal, @@ -171,6 +174,9 @@ Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlanPtr &Plan); + VPScalarIVStepsRecipe *tryToBuildScalarSteps(Instruction *Instr, + ArrayRef Operands, + VFRange &Range); /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. void fixHeaderPhis(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -196,10 +196,12 @@ /// needed for generating the output IR. struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, - DominatorTree *DT, IRBuilder<> &Builder, - InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), - Plan(Plan) {} + DominatorTree *DT, ScalarEvolution *SE, IRBuilder<> &Builder, + InnerLoopVectorizer *ILV, VPlan *Plan, + Instruction *Induction, Instruction *OldInduction) + : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), SE(SE), Builder(Builder), + ILV(ILV), Plan(Plan), OldInduction(OldInduction), Induction(Induction) { + } /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; @@ -333,6 +335,8 @@ /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. DominatorTree *DT; + ScalarEvolution *SE; + /// Hold a reference to the IRBuilder used to generate output IR code. IRBuilder<> &Builder; @@ -353,6 +357,9 @@ /// Holds recipes that may generate a poison value that is used after /// vectorization, even when their operands are not poison. SmallPtrSet MayGeneratePoisonRecipes; + + Instruction *OldInduction; + Instruction *Induction; }; /// VPUsers instance used by VPBlockBase to manage CondBit and the block @@ -699,6 +706,7 @@ /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + void insertBefore(VPBasicBlock &BB, iplist::iterator I); /// Insert an unlinked Recipe into a basic block immediately after /// the specified Recipe. @@ -1059,6 +1067,44 @@ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } }; +/// A recipe for handling phi nodes of integer and floating-point inductions, +/// producing their vector and scalar values. +class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { + PHINode *IV; + const InductionDescriptor &IndDesc; + bool AlsoSplat; + bool NeedSplatIV; + Instruction *Trunc = nullptr; + +public: + VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc, + bool NeedSplatIV) + : VPRecipeBase(VPScalarIVStepsSC, {}), VPValue(IV, this), IV(IV), + IndDesc(IndDesc), NeedSplatIV(NeedSplatIV) {} + + VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc, + bool NeedSplatIV, Instruction *Trunc) + : VPRecipeBase(VPScalarIVStepsSC, {}), VPValue(Trunc, this), IV(IV), + IndDesc(IndDesc), NeedSplatIV(NeedSplatIV), Trunc(Trunc) {} + + ~VPScalarIVStepsRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + + /// Generate the vectorized and scalarized versions of the phi node as + /// needed by their users. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling first order recurrences and pointer inductions. For /// first-order recurrences, the start value is the first operand of the recipe /// and the incoming value from the backedge is the second operand. It also diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -630,14 +630,19 @@ insertAfter(InsertPos); } -void VPRecipeBase::moveBefore(VPBasicBlock &BB, - iplist::iterator I) { +void VPRecipeBase::insertBefore(VPBasicBlock &BB, + iplist::iterator I) { assert(I == BB.end() || I->getParent() == &BB); - removeFromParent(); Parent = &BB; BB.getRecipeList().insert(I, this); } +void VPRecipeBase::moveBefore(VPBasicBlock &BB, + iplist::iterator I) { + removeFromParent(); + insertBefore(BB, I); +} + void VPInstruction::generateInstruction(VPTransformState &State, unsigned Part) { IRBuilder<> &Builder = State.Builder; @@ -1156,6 +1161,11 @@ O << " " << VPlanIngredient(IV); } +void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "SCALAR-STEPS " << VPlanIngredient(IV); +} + void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -329,6 +329,7 @@ VPWidenMemoryInstructionSC, VPWidenSC, VPWidenSelectSC, + VPScalarIVStepsSC, // Phi-like recipes. Need to be kept together. VPBlendSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -10,9 +10,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2> @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP5]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 @@ -57,6 +56,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = add [[DOTSPLAT]], [[TMP11]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, [[TMP13]], i64 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to * diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -23,10 +23,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* @@ -36,7 +36,7 @@ ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -52,7 +52,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -147,27 +147,27 @@ ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] ; CHECK: vector.body9: ; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT22]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT23]], +; CHECK-NEXT: [[OFFSET_IDX26:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] ; CHECK: pred.store.if30: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX26]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX26]] ; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 ; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX26]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]] ; CHECK: pred.store.continue31: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] ; CHECK: pred.store.if32: -; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX26]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]] @@ -180,7 +180,7 @@ ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] ; CHECK: pred.store.if34: -; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX26]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]] @@ -193,7 +193,7 @@ ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]] ; CHECK: pred.store.if36: -; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX26]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]] @@ -277,16 +277,16 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT13]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP8]], align 16 ; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: @@ -294,33 +294,33 @@ ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] ; CHECK: pred.store.if16: ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 -; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP5]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] ; CHECK: pred.store.continue17: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] ; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 -; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 +; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP6]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] ; CHECK: pred.store.continue19: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] ; CHECK: pred.store.if20: ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 -; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 +; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] ; CHECK: pred.store.continue21: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -157,8 +157,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1471,8 +1471,8 @@ ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] -; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 4 -; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 @@ -1485,13 +1485,13 @@ ; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL2: pred.store.if: ; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] -; VEC4_INTERL2-NEXT: store float [[TMP1]], float* [[TMP9]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP0]], float* [[TMP9]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL2: pred.store.if4: -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 @@ -1500,7 +1500,7 @@ ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; VEC4_INTERL2: pred.store.if6: -; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP1]], 2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 @@ -1509,7 +1509,7 @@ ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; VEC4_INTERL2: pred.store.if8: -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP1]], 3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 @@ -1518,15 +1518,15 @@ ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; VEC4_INTERL2: pred.store.if10: -; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP1]], 4.000000e+00 -; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]] +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] ; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] ; VEC4_INTERL2: pred.store.continue11: ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; VEC4_INTERL2: pred.store.if12: -; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP1]], 5.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 @@ -1535,7 +1535,7 @@ ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; VEC4_INTERL2: pred.store.if14: -; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP1]], 6.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 @@ -1544,7 +1544,7 @@ ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] ; VEC4_INTERL2: pred.store.if16: -; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP1]], 7.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -548,10 +548,10 @@ ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] -; VEC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP1]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 ; VEC-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <2 x i8>* ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, <2 x i8>* [[TMP4]], align 1 @@ -561,7 +561,7 @@ ; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 ; VEC-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 -; VEC-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* undef, i64 [[TMP1]] ; VEC-NEXT: store i8 [[TMP8]], i8* [[TMP9]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -159,9 +159,9 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 @@ -3826,9 +3826,9 @@ ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 3 +; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP22]] ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 @@ -5277,10 +5277,10 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -6258,17 +6258,17 @@ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[SRC:%.*]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* @@ -6391,27 +6391,27 @@ ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], 2 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 3 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = load i32, i32* [[SRC:%.*]], align 4 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = load i32, i32* [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP6]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP1]] +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[TMP13]], i32 0 @@ -6585,12 +6585,12 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 ; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP25]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP21]], <2 x i32>* [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -6817,19 +6817,19 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP22]] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP23]] +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP24]] ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to <2 x i32>* -; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP23]], <2 x i32>* [[TMP28]], align 4 +; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], <2 x i32>* [[TMP28]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 2 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>* -; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP24]], <2 x i32>* [[TMP30]], align 4 +; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP30]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -190,10 +190,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], -1 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[N]] ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP16]] @@ -203,7 +203,7 @@ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP20]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = fadd fast <4 x float> [[REVERSE]], -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[TMP24]], align 4 @@ -226,10 +226,10 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[OFFSET_IDX9:%.*]] = trunc i64 [[INDEX4]] to i32 -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX9]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP27]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX9]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP26]], -1 ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], [[N]] ; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP30]] @@ -239,7 +239,7 @@ ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP34]], align 4 ; CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP26]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 0 ; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP35]], <4 x float>* [[TMP38]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -145,14 +145,14 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to <4 x i8*>* ; CHECK-NEXT: store <4 x i8*> [[TMP3]], <4 x i8*>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -30,29 +30,29 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[OFFSET_IDX]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INC]] to i8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP3]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT3]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP4]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i8> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[BROADCAST_SPLAT4]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i8 0, [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[TMP3]], [[TMP6]] ; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX5]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT9]] -; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i8> [[BROADCAST_SPLAT7]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX5]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT6]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 +; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT8]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> , [[DOTSPLAT9]] +; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i64> [[BROADCAST_SPLAT7]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 0, [[INC]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX5]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -66,7 +66,7 @@ ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: -; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP7]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -17,14 +17,14 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT1]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -6,9 +6,9 @@ ; PR15882 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %offset.idx = sub i64 %startval, %index -; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0 -; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4 +; CHECK: %offset.idx3 = sub i64 %startval, %index +; CHECK: %[[a0:.+]] = add i64 %offset.idx3, 0 +; CHECK: %[[a4:.+]] = add i64 %offset.idx3, -4 define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) { entry: @@ -32,9 +32,9 @@ ; CHECK-LABEL: @reverse_induction_i128( ; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %offset.idx = sub i128 %startval, %index -; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0 -; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4 +; CHECK: %offset.idx3 = sub i128 %startval, %index +; CHECK: %[[a0:.+]] = add i128 %offset.idx3, 0 +; CHECK: %[[a4:.+]] = add i128 %offset.idx3, -4 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -13,18 +13,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[X]] -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], [[X]] ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -25,21 +25,21 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -54,7 +54,7 @@ ; CHECK-NEXT: [[SEL]] = select i1 [[SEL_COND]], i32 [[NEXT]], i32 10 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[EXIT]] diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -60,7 +60,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -130,7 +130,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -156,7 +156,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -220,7 +220,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -241,7 +241,7 @@ ; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -348,7 +348,7 @@ ; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP9:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -404,7 +404,7 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, 1000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -421,7 +421,7 @@ ; CHECK-NEXT: store i32 [[P]], i32* [[GEP_PTR]], align 4 ; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP11:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -26,10 +26,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* @@ -39,7 +39,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 @@ -94,10 +94,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[SIZE]], [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>* @@ -107,7 +107,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -123,7 +123,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "for.body:\l" + -; CHECK-NEXT: " WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + +; CHECK-NEXT: " SCALAR-STEPS %iv = phi %iv.next, 0\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, ir\<%iv\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + ; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>)\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -11,7 +11,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 +; CHECK-NEXT: SCALAR-STEPS %iv = phi %iv.next, 0 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) @@ -85,8 +85,8 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: SCALAR-STEPS %iv = phi %iv.next, 0 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) @@ -185,7 +185,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, ir<%iv> ; CHECK-NEXT: INTERLEAVE-GROUP with factor 4 at %AB.0, ir<%gep.AB.0> ; CHECK-NEXT: ir<%AB.0> = load from index 0 @@ -247,8 +247,8 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%l.a> = load ir<%arrayidx> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%b>, ir<%iv> @@ -282,7 +282,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: CLONE ir<%isd> = getelementptr ir<%asd>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd> ; CHECK-NEXT: WIDEN ir<%psd> = add ir<%lsd>, ir<23> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -872,7 +872,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS %iv = phi 0, %iv.next ; CHECK-NEXT: Successor(s): loop.then ; CHECK-EMPTY: ; CHECK-NEXT: loop.then: