diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2484,7 +2484,11 @@ return B.CreateMul(X, Y); }; - switch (ID.getKind()) { + auto Kind = ID.getKind(); + if (Kind == InductionDescriptor::IK_PtrInduction && + StartValue->getType()->isIntegerTy()) + Kind = InductionDescriptor::IK_IntInduction; + switch (Kind) { case InductionDescriptor::IK_IntInduction: { assert(!isa(Index->getType()) && "Vector indices not supported for integer inductions yet"); @@ -8232,13 +8236,7 @@ VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(), *PSE.getSE()); assert(isa(II->getStep())); - return new VPWidenPointerInductionRecipe( - Phi, Operands[0], Step, *II, - LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { - return CM.isScalarAfterVectorization(Phi, VF); - }, - Range)); + return new VPWidenPointerInductionRecipe(Phi, Operands[0], Step, *II); } return nullptr; } @@ -9158,7 +9156,9 @@ // in ways that accessing values using original IR values is incorrect. Plan->disableValue2VPValue(); - VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); + VPlanTransforms::removeDeadRecipes(*Plan); + VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE(), + OrigLoop->getLoopLatch()); VPlanTransforms::removeDeadRecipes(*Plan); bool ShouldSimplify = true; @@ -9459,37 +9459,6 @@ auto *IVR = getParent()->getPlan()->getCanonicalIV(); PHINode *CanonicalIV = cast(State.get(IVR, 0)); - if (onlyScalarsGenerated(State.VF)) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = State.Builder.CreateSExtOrTrunc( - CanonicalIV, IndDesc.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(this); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *PartStart = - createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = State.Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); - - Value *Step = State.get(getOperand(1), VPIteration(0, Part)); - Value *SclrGep = emitTransformedIndex( - State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc); - SclrGep->setName("next.gep"); - State.set(this, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - assert(isa(IndDesc.getStep()) && "Induction step not a SCEV constant!"); Type *PhiType = IndDesc.getStep()->getType(); @@ -9569,7 +9538,8 @@ } assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); - State.set(this, DerivedIV, VPIteration(0, 0)); + for (unsigned Part = 0; Part != State.UF; ++Part) + State.set(this, DerivedIV, VPIteration(Part, 0)); } void VPScalarIVStepsRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1182,18 +1182,14 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { const InductionDescriptor &IndDesc; - bool IsScalarAfterVectorization; - public: /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p /// Start. VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, - const InductionDescriptor &IndDesc, - bool IsScalarAfterVectorization) + const InductionDescriptor &IndDesc) : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC, Phi), - IndDesc(IndDesc), - IsScalarAfterVectorization(IsScalarAfterVectorization) { + IndDesc(IndDesc) { addOperand(Start); addOperand(Step); } @@ -1206,14 +1202,14 @@ /// Generate vector values for the pointer induction. void execute(VPTransformState &State) override; - /// Returns true if only scalar values will be generated. - bool onlyScalarsGenerated(ElementCount VF); - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the induction descriptor for the recipe. + const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } }; /// A recipe for handling header phis that are widened in the vector loop. @@ -1460,6 +1456,12 @@ "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); } + + bool usesScalars(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return Op == getAddr(); + } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -1836,7 +1838,8 @@ /// Check if the induction described by \p ID is canonical, i.e. has the same /// start, step (of 1), and type as the canonical IV. - bool isCanonical(const InductionDescriptor &ID, Type *Ty) const; + bool isCanonical(Value *StartV, const InductionDescriptor &ID, + Type *Ty) const; }; /// A recipe for generating the active lane mask for the vector loop that is @@ -2599,6 +2602,10 @@ bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } + bool hasScalableVFs() const { + return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); + } + const std::string &getName() const { return Name; } void setName(const Twine &newName) { Name = newName.str(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -655,7 +655,7 @@ assert(all_of(IV->users(), [](const VPUser *U) { if (isa(U) || - isa(U)) + isa(U) || isa(U)) return true; auto *VPI = cast(U); return VPI->getOpcode() == @@ -706,11 +706,6 @@ Phi = cast(State->get(R.getVPSingleValue(), 0)); } else { auto *WidenPhi = cast(&R); - // TODO: Split off the case that all users of a pointer phi are scalar - // from the VPWidenPointerInductionRecipe. - if (WidenPhi->onlyScalarsGenerated(State->VF)) - continue; - auto *GEP = cast(State->get(WidenPhi, 0)); Phi = cast(GEP->getPointerOperand()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1059,25 +1059,20 @@ } #endif -bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID, +bool VPCanonicalIVPHIRecipe::isCanonical(Value *StartV, + const InductionDescriptor &ID, Type *Ty) const { if (Ty != getScalarType()) return false; // The start value of ID must match the start value of this canonical // induction. - if (getStartValue()->getLiveInIRValue() != ID.getStartValue()) + if (getStartValue()->getLiveInIRValue() != StartV) return false; ConstantInt *Step = ID.getConstIntStepValue(); // ID must also be incremented by one. IK_IntInduction always increment the // induction by Step, but the binary op may not be set. - return ID.getKind() == InductionDescriptor::IK_IntInduction && Step && - Step->isOne(); -} - -bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { - return IsScalarAfterVectorization && - (!VF.isScalable() || vputils::onlyFirstLaneUsed(this)); + return Step && Step->isOne(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -59,7 +59,8 @@ /// provide them by building scalar steps off of the canonical scalar IV and /// update the original IV's users. This is an optional optimization to reduce /// the needs of vector extracts. - static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE); + static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE, + BasicBlock *OrigLoopLatch); /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing /// them with already existing recipes expanding the same SCEV expression. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -400,9 +400,11 @@ } } -static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, - ScalarEvolution &SE, Instruction *TruncI, - Type *IVTy, VPValue *StartV) { +static VPScalarIVStepsRecipe *createScalarIVSteps(VPlan &Plan, + const InductionDescriptor &ID, + ScalarEvolution &SE, + Instruction *TruncI, + Type *IVTy, VPValue *StartV) { VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); auto IP = HeaderVPBB->getFirstNonPhi(); VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); @@ -410,7 +412,7 @@ vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); Type *TruncTy = TruncI ? TruncI->getType() : IVTy; VPValue *BaseIV = CanonicalIV; - if (!CanonicalIV->isCanonical(ID, TruncTy)) { + if (!CanonicalIV->isCanonical(StartV->getLiveInIRValue(), ID, TruncTy)) { BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step, TruncTy); HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); @@ -421,11 +423,65 @@ return Steps; } -void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { +void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE, + BasicBlock *OrigLoopLatch) { SmallVector ToRemove; VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); + bool HasScalableVFs = Plan.hasScalableVFs(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + if (auto *PtrIV = dyn_cast(&Phi)) { + if (all_of(PtrIV->users(), + [PtrIV](VPUser *U) { return !U->usesScalars(PtrIV); })) + continue; + + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(PtrIV); + if (HasScalableVFs && !OnlyFirstLaneUsed) + continue; + + const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); + VPValue *StartV = Plan.getOrAddExternalDef( + ConstantInt::get(ID.getStep()->getType(), 0)); + VPRecipeBase *Steps = createScalarIVSteps( + Plan, ID, SE, nullptr, ID.getStep()->getType(), StartV); + + bool IsUniform = false; + if (OnlyFirstLaneUsed) { + Steps = cast(Steps->getOperand(0)->getDefiningRecipe()); + IsUniform = true; + } + + SmallVector Ops = {PtrIV->getOperand(0), + Steps->getVPSingleValue()}; + auto *P = cast(PtrIV->getUnderlyingInstr()); + GetElementPtrInst *GEP = nullptr; + do { + Value *V = P->getIncomingValueForBlock(OrigLoopLatch); + if (auto *G = dyn_cast(V)) { + GEP = G; + break; + } + P = cast(V); + } while (true); + + auto *Recipe = new VPReplicateRecipe( + GEP, nullptr, make_range(Ops.begin(), Ops.end()), IsUniform, false); + + if (isa(Steps)) + Recipe->insertBefore(&*HeaderVPBB->getFirstNonPhi()); + else + Recipe->insertAfter(Steps); + PtrIV->replaceAllUsesWith(Recipe); + + continue; + } + auto *IV = dyn_cast(&Phi); + if (!IV) + continue; + if (HasOnlyVectorVFs && + none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); })) + continue; + auto *WideIV = dyn_cast(&Phi); if (!WideIV) continue; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -5,7 +5,7 @@ ; Test for #57712. define void @test_widen_ptr_induction(ptr %ptr.start.1) { -; CHECK-LABEL: @test_widen_ptr_induction( +; CHECK-LABEL: define {{[^@]+}}@test_widen_ptr_induction( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: @@ -15,72 +15,72 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 2 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX7]], 0 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX7]], 1 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP8]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP9]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 10000 -; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX4]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[PTR_START_1]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> [[TMP25]], ptr [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <2 x ptr> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP23]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 +; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10001, 10000 -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 10001, 10000 +; CHECK-NEXT: br i1 [[CMP_N3]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]]) ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 @@ -112,7 +112,7 @@ declare void @llvm.assume(i1 noundef) define void @test_widen_induction(ptr %A, i64 %N) { -; CHECK-LABEL: @test_widen_induction( +; CHECK-LABEL: define {{[^@]+}}@test_widen_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -168,7 +168,7 @@ } define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { -; CHECK-LABEL: @test_widen_induction_variable_start( +; CHECK-LABEL: define {{[^@]+}}@test_widen_induction_variable_start( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[N:%.*]], [[START:%.*]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 @@ -230,7 +230,7 @@ } define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { -; CHECK-LABEL: @test_widen_induction_step_2( +; CHECK-LABEL: define {{[^@]+}}@test_widen_induction_step_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -295,7 +295,7 @@ } define void @test_widen_extended_induction(ptr %dst) { -; CHECK-LABEL: @test_widen_extended_induction( +; CHECK-LABEL: define {{[^@]+}}@test_widen_extended_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: @@ -357,7 +357,7 @@ } define void @test_widen_truncated_induction(ptr %A) { -; CHECK-LABEL: @test_widen_truncated_induction( +; CHECK-LABEL: define {{[^@]+}}@test_widen_truncated_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -36,18 +36,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr inbounds i16, i16* [[PDST]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[PSRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP]], i64 8 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[NEXT_GEP6]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP8]], align 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[NEXT_GEP6]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP6]], i64 8 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -127,18 +127,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr inbounds i8, i8* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, i8* [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[NEXT_GEP3]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP6]], <16 x i8>* [[TMP8]], align 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP3]], i64 16 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -167,8 +167,8 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX19:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[INDEX19]] -; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[INDEX19]] +; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr inbounds i8, i8* [[PDST]], i64 [[INDEX19]] +; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr inbounds i8, i8* [[PSRC]], i64 [[INDEX19]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[NEXT_GEP20]] to <8 x i8>* ; CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x i8>, <8 x i8>* [[TMP12]], align 2 ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD22]], <8 x i8> [[BROADCAST_SPLAT24]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -378,18 +378,12 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, ptr [[START:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 [[TMP13]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 32 @@ -416,9 +410,8 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX8]], 0 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr inbounds i8, ptr [[START]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP9]], i32 0 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP23]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 8 @@ -459,18 +452,12 @@ ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, ptr [[START:%.*]], i64 [[INDEX]] +; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF8-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 16 -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 [[TMP11]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 ; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 @@ -491,9 +478,8 @@ ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: ; CHECK-VF8-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP16:%.*]] = add i64 [[INDEX5]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP16]] -; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP6]], i32 0 +; CHECK-VF8-NEXT: [[NEXT_GEP6:%.*]] = getelementptr inbounds i8, ptr [[START]], i64 [[INDEX]] +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP6]], i32 0 ; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 10000 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -45,20 +45,13 @@ ; CHECK-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT6]], [[TMP19]] ; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul [[TMP20]], shufflevector ( insertelement ( poison, i64 8, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP7]] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr inbounds ptr, ptr [[START_2]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[NEXT_GEP8]], i32 0 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP29]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[NEXT_GEP8]], i32 [[TMP31]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1 ; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 +; CHECK-NEXT: CLONE vp<[[GEP:%.+]]> = getelementptr ir<%start.1>, vp<[[CAN_IV]]> ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> -; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> +; CHECK-NEXT: WIDEN store vp<[[GEP]]>, ir<%ptr.iv.2.next> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%lv>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.2>, ir<%add> @@ -51,8 +51,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 @@ -64,8 +62,9 @@ ; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8*, i8** [[START_1]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[TMP12]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to * ; CHECK-NEXT: store [[TMP13]], * [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP12]], i32 0 @@ -144,32 +143,19 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP15]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP14]] to * -; CHECK-NEXT: store [[TMP16]], * [[TMP17]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP19]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[START]], i64 [[INDEX2]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP7]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP6]] to * +; CHECK-NEXT: store [[TMP8]], * [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -181,8 +167,8 @@ ; CHECK-NEXT: [[PTR_PHI:%.*]] = phi i8* [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[PTR_PHI]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP21]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[PTR_PHI]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP13]], 1 ; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_PHI]], align 1 ; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, i8* [[PTR_PHI]], i64 1 ; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[PTR_PHI_NEXT]], [[START]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -157,31 +157,31 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , * [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to * -; CHECK-NEXT: store [[TMP10]], * [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to * -; CHECK-NEXT: store [[TMP11]], * [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , * [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = shl nsw [[WIDE_LOAD4]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP4]] to * +; CHECK-NEXT: store [[TMP12]], * [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to * +; CHECK-NEXT: store [[TMP13]], * [[TMP19]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP20]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -194,8 +194,8 @@ ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[S_010]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP23]], 1 ; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1 ; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1 @@ -252,33 +252,33 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], [[TMP6]] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32*, i32** [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32*, i32** [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[BC:%.*]] = bitcast [[TMP7]] to *> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement *> [[BC]], i64 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[NEXT_GEP]] to * -; CHECK-NEXT: store [[TMP7]], * [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement *> [[BC]], i64 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32** [[TMP8]] to * +; CHECK-NEXT: store [[TMP7]], * [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP9]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP10]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -294,7 +294,7 @@ ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR5]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -25,8 +25,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, i8* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr inbounds i8, i8* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, i8* [[PSRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer @@ -118,8 +118,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i16, i16* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr inbounds i16, i16* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[PSRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer @@ -211,8 +211,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr inbounds i32, i32* [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[PSRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll @@ -25,8 +25,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr inbounds i16, i16* [[PDST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[PSRC:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[BLOCKSIZE]]) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -28,8 +28,8 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PA]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr float, float* [[PB]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr inbounds float, float* [[PB]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds float, float* [[PA]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP5]] to <4 x float>* diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -12,8 +12,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -53,9 +53,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> @@ -110,7 +110,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* @@ -161,8 +161,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -204,9 +204,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A]], i32 [[TMP1]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> @@ -300,8 +300,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -356,9 +356,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[TMP1]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> @@ -451,8 +451,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -492,9 +492,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> @@ -549,7 +549,7 @@ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, float* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison) ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* @@ -599,8 +599,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds half, half* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds half, half* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast half* [[NEXT_GEP]] to <8 x half>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -640,9 +640,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds half, half* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds half, half* [[A]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> @@ -695,9 +695,9 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr inbounds half, half* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds half, half* [[A]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <24 x half>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x half>, <24 x half>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x half> [[WIDE_VEC]], <24 x half> poison, <8 x i32> @@ -758,14 +758,14 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i32 4 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -832,7 +832,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) @@ -843,13 +843,13 @@ ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i32 4 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i32 8 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i32 12 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -23,13 +23,12 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll @@ -4,7 +4,7 @@ ; define dso_local void @loop_i128(i128* nocapture %ptr, i64 %N) { -; CHECK-LABEL: @loop_i128( +; CHECK-LABEL: define {{[^@]+}}@loop_i128( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -37,7 +37,7 @@ } define dso_local void @loop_f128(fp128* nocapture %ptr, i64 %N) { -; CHECK-LABEL: @loop_f128( +; CHECK-LABEL: define {{[^@]+}}@loop_f128( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -70,7 +70,7 @@ } define dso_local void @loop_invariant_i128(i128* nocapture %ptr, i128 %val, i64 %N) { -; CHECK-LABEL: @loop_invariant_i128( +; CHECK-LABEL: define {{[^@]+}}@loop_invariant_i128( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -99,7 +99,7 @@ } define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) { -; CHECK-LABEL: @uniform_store_i1( +; CHECK-LABEL: define {{[^@]+}}@uniform_store_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 @@ -114,27 +114,34 @@ ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT3]], <2 x i64*> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64*> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, i64* [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <2 x i64*> [[TMP8]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64*> [[TMP9]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: store i1 [[TMP12]], i1* [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[START]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[START]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64*> [[TMP7]], i64* [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[START]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[START]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64*> [[TMP11]], i64* [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, i64* [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP12]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <2 x i64*> [[TMP17]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <2 x i64*> [[TMP18]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP20]], i32 1 +; CHECK-NEXT: store i1 [[TMP21]], i1* [[DST:%.*]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -146,7 +153,7 @@ ; CHECK-NEXT: [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[FIRST_SROA]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i64, i64* [[FIRST_SROA]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i64, i64* [[FIRST_SROA]], i64 1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64* [[INCDEC_PTR]], [[START]] ; CHECK-NEXT: store i1 [[CMP_NOT]], i1* [[DST]], align 1 @@ -174,7 +181,7 @@ } define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) { -; CHECK-LABEL: @loop_fixed_width_i128( +; CHECK-LABEL: define {{[^@]+}}@loop_fixed_width_i128( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -900,18 +900,17 @@ ; AVX512: vector.body: ; AVX512-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX512-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 -; AVX512-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP13]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP13:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 [[INDEX]] +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 0 ; AVX512-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>* ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP17]], align 4, !alias.scope !14 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP14]], i32 4, <16 x i1> ), !alias.scope !17, !noalias !19 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr float, float* [[NEXT_GEP]], i32 0 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP13]], i32 4, <16 x i1> ), !alias.scope !17, !noalias !19 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 0 ; AVX512-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>* ; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x float>, <16 x float>* [[TMP19]], align 4, !alias.scope !21 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP14]], i64 1 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP13]], i64 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15]], <16 x float*> [[TMP20]], i32 4, <16 x i1> ), !alias.scope !17, !noalias !19 ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i64 256 @@ -991,30 +990,28 @@ ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FVW2-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 -; FVW2-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP13]] -; FVW2-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0 -; FVW2-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 16 -; FVW2-NEXT: [[NEXT_GEP15:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]] -; FVW2-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1 -; FVW2-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 16 -; FVW2-NEXT: [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]] -; FVW2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]] +; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; FVW2-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 +; FVW2-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 16 +; FVW2-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[TMP13]] +; FVW2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[TMP14]] +; FVW2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 [[INDEX]] +; FVW2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] ; FVW2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 0 ; FVW2-NEXT: [[TMP20:%.*]] = bitcast float* [[TMP19]] to <2 x float>* ; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP20]], align 4, !alias.scope !14 ; FVW2-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 -; FVW2-NEXT: store float [[TMP21]], float* [[NEXT_GEP15]], align 4, !alias.scope !17, !noalias !19 +; FVW2-NEXT: store float [[TMP21]], float* [[TMP15]], align 4, !alias.scope !17, !noalias !19 ; FVW2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 -; FVW2-NEXT: store float [[TMP22]], float* [[NEXT_GEP16]], align 4, !alias.scope !17, !noalias !19 -; FVW2-NEXT: [[TMP23:%.*]] = getelementptr float, float* [[NEXT_GEP]], i32 0 +; FVW2-NEXT: store float [[TMP22]], float* [[TMP16]], align 4, !alias.scope !17, !noalias !19 +; FVW2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 0 ; FVW2-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <2 x float>* -; FVW2-NEXT: [[WIDE_LOAD17:%.*]] = load <2 x float>, <2 x float>* [[TMP24]], align 4, !alias.scope !21 -; FVW2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP15]], i64 1 -; FVW2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1 -; FVW2-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD17]], i32 0 +; FVW2-NEXT: [[WIDE_LOAD15:%.*]] = load <2 x float>, <2 x float>* [[TMP24]], align 4, !alias.scope !21 +; FVW2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 1 +; FVW2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 1 +; FVW2-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD15]], i32 0 ; FVW2-NEXT: store float [[TMP27]], float* [[TMP25]], align 4, !alias.scope !17, !noalias !19 -; FVW2-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD17]], i32 1 +; FVW2-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD15]], i32 1 ; FVW2-NEXT: store float [[TMP28]], float* [[TMP26]], align 4, !alias.scope !17, !noalias !19 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; FVW2-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -7,7 +7,7 @@ %pair = type { ptr, ptr } define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { -; CHECK-LABEL: @test_pr55375_interleave_opaque_ptr( +; CHECK-LABEL: define {{[^@]+}}@test_pr55375_interleave_opaque_ptr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64 ; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64 @@ -25,15 +25,14 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[START]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> ; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 @@ -48,7 +47,7 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[IV]], i64 0, i32 1 +; CHECK-NEXT: [[IV_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[IV]], i64 0, i32 1 ; CHECK-NEXT: store ptr [[IV]], ptr [[IV_1]], align 8 ; CHECK-NEXT: store ptr null, ptr [[IV]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds [[PAIR]], ptr [[IV]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -40,17 +40,17 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr inbounds i16, i16* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i16, i16* [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, <16 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i16>, <16 x i16>* [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP]], i64 32 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i16>, <16 x i16>* [[TMP7]], align 2 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP]], i64 48 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <16 x i16>* ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i16>, <16 x i16>* [[TMP9]], align 2 ; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD]], <16 x i16> [[BROADCAST_SPLAT]]) @@ -59,13 +59,13 @@ ; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD11]], <16 x i16> [[BROADCAST_SPLAT17]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[NEXT_GEP5]] to <16 x i16>* ; CHECK-NEXT: store <16 x i16> [[TMP10]], <16 x i16>* [[TMP14]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 16 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP5]], i64 16 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16* [[TMP15]] to <16 x i16>* ; CHECK-NEXT: store <16 x i16> [[TMP11]], <16 x i16>* [[TMP16]], align 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 32 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP5]], i64 32 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <16 x i16>* ; CHECK-NEXT: store <16 x i16> [[TMP12]], <16 x i16>* [[TMP18]], align 2 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 48 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, i16* [[NEXT_GEP5]], i64 48 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16* [[TMP19]] to <16 x i16>* ; CHECK-NEXT: store <16 x i16> [[TMP13]], <16 x i16>* [[TMP20]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 @@ -94,8 +94,8 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP30:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[INDEX29]] -; CHECK-NEXT: [[NEXT_GEP31:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[INDEX29]] +; CHECK-NEXT: [[NEXT_GEP31:%.*]] = getelementptr inbounds i16, i16* [[PDST]], i64 [[INDEX29]] +; CHECK-NEXT: [[NEXT_GEP30:%.*]] = getelementptr inbounds i16, i16* [[PSRC]], i64 [[INDEX29]] ; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[NEXT_GEP30]] to <8 x i16>* ; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <8 x i16>, <8 x i16>* [[TMP22]], align 2 ; CHECK-NEXT: [[TMP23:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[WIDE_LOAD32]], <8 x i16> [[BROADCAST_SPLAT34]]) @@ -183,17 +183,17 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr inbounds i8, i8* [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i8, i8* [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, <32 x i8>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 32 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, <32 x i8>* [[TMP7]], align 2 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 96 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 96 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <32 x i8>* ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <32 x i8>, <32 x i8>* [[TMP9]], align 2 ; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD]], <32 x i8> [[WIDE_LOAD]], <32 x i8> [[BROADCAST_SPLAT]]) @@ -202,13 +202,13 @@ ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD11]], <32 x i8> [[WIDE_LOAD11]], <32 x i8> [[BROADCAST_SPLAT17]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[NEXT_GEP5]] to <32 x i8>* ; CHECK-NEXT: store <32 x i8> [[TMP10]], <32 x i8>* [[TMP14]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 32 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP5]], i64 32 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <32 x i8>* ; CHECK-NEXT: store <32 x i8> [[TMP11]], <32 x i8>* [[TMP16]], align 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 64 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP5]], i64 64 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <32 x i8>* ; CHECK-NEXT: store <32 x i8> [[TMP12]], <32 x i8>* [[TMP18]], align 2 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 96 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP5]], i64 96 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <32 x i8>* ; CHECK-NEXT: store <32 x i8> [[TMP13]], <32 x i8>* [[TMP20]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 @@ -237,8 +237,8 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP30:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[INDEX29]] -; CHECK-NEXT: [[NEXT_GEP31:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[INDEX29]] +; CHECK-NEXT: [[NEXT_GEP31:%.*]] = getelementptr inbounds i8, i8* [[PDST]], i64 [[INDEX29]] +; CHECK-NEXT: [[NEXT_GEP30:%.*]] = getelementptr inbounds i8, i8* [[PSRC]], i64 [[INDEX29]] ; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[NEXT_GEP30]] to <16 x i8>* ; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 2 ; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[WIDE_LOAD32]], <16 x i8> [[WIDE_LOAD32]], <16 x i8> [[BROADCAST_SPLAT34]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -276,56 +276,56 @@ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT13]], +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT5]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT6]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP8]], align 16 -; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP8]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] -; CHECK: pred.store.if14: -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP9]], align 16 -; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP5]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.continue15: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] -; CHECK: pred.store.if16: -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 -; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP6]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] -; CHECK: pred.store.continue17: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.if18: -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 -; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP7]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.continue19: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[TMP13]] +; CHECK-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.continue8: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; CHECK: pred.store.if9: +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 16 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[TMP19]] +; CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] +; CHECK: pred.store.continue10: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.if11: +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 16 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[TMP25]] +; CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP26]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.continue12: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -405,24 +405,24 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP9:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP6:%.*]] -; CHECK: 6: -; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: 7: +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 8: +; CHECK-NEXT: br i1 poison, label [[TMP9]], label [[TMP8]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: 9: ; CHECK-NEXT: ret void ; br label %1 @@ -452,7 +452,7 @@ ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], @@ -460,63 +460,63 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[NEXT_GEP7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 7 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP6]], i32* [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 -; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7 -; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP8]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] -; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2 -; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7 -; CHECK-NEXT: store i32 [[TMP17]], i32* [[NEXT_GEP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] -; CHECK: pred.store.continue14: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 -; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.if15: -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2 -; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 -; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP10]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.continue16: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; CHECK: pred.store.if4: +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[TMP14]] +; CHECK-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] +; CHECK: pred.store.continue5: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; CHECK: pred.store.if6: +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[SRC]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP18]], align 2 +; CHECK-NEXT: [[TMP20:%.*]] = zext i16 [[TMP19]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl nuw nsw i32 [[TMP20]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[TMP22]] +; CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] +; CHECK: pred.store.continue7: +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; CHECK: pred.store.if8: +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i16, i16* [[SRC]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP26]], align 2 +; CHECK-NEXT: [[TMP28:%.*]] = zext i16 [[TMP27]] to i32 +; CHECK-NEXT: [[TMP29:%.*]] = shl nuw nsw i32 [[TMP28]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[TMP30]] +; CHECK-NEXT: store i32 [[TMP29]], i32* [[TMP31]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] +; CHECK: pred.store.continue9: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP25:%.*]] -; CHECK: 25: -; CHECK-NEXT: br i1 poison, label [[TMP26]], label [[TMP25]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: 26: +; CHECK-NEXT: br label [[TMP33:%.*]] +; CHECK: 33: +; CHECK-NEXT: br i1 poison, label [[TMP34]], label [[TMP33]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: 34: ; CHECK-NEXT: ret void ; br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll b/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll --- a/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll +++ b/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: @consecutive_no_gep( ; CHECK: vector.body ; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ] -; CHECK: getelementptr float, float* %{{.*}}, i64 %[[index]] +; CHECK: getelementptr inbounds float, float* %{{.*}}, i64 %[[index]] ; CHECK: load <4 x float> define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -279,7 +279,7 @@ ; CHECK: vector.body ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK-NOT: getelementptr -; CHECK: %next.gep = getelementptr i32, i32* %a, i64 %index +; CHECK: getelementptr inbounds i32, i32* %a, i64 %index ; CHECK-NOT: getelementptr ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; @@ -311,16 +311,13 @@ ; INTER: vector.body ; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; INTER: %[[I0:.+]] = shl i64 %index, 2 -; INTER: %next.gep = getelementptr i32, i32* %a, i64 %[[I0]] -; INTER: %[[S1:.+]] = shl i64 %index, 2 -; INTER: %[[I1:.+]] = or i64 %[[S1]], 4 -; INTER: %next.gep2 = getelementptr i32, i32* %a, i64 %[[I1]] -; INTER: %[[S2:.+]] = shl i64 %index, 2 -; INTER: %[[I2:.+]] = or i64 %[[S2]], 8 -; INTER: %next.gep3 = getelementptr i32, i32* %a, i64 %[[I2]] -; INTER: %[[S3:.+]] = shl i64 %index, 2 -; INTER: %[[I3:.+]] = or i64 %[[S3]], 12 -; INTER: %next.gep4 = getelementptr i32, i32* %a, i64 %[[I3]] +; INTER: %[[I1:.+]] = or i64 %[[I0]], 4 +; INTER: %[[I2:.+]] = or i64 %[[I0]], 8 +; INTER: %[[I3:.+]] = or i64 %[[I0]], 12 +; INTER: = getelementptr inbounds i32, i32* %a, i64 %[[I0]] +; INTER: = getelementptr inbounds i32, i32* %a, i64 %[[I1]] +; INTER: = getelementptr inbounds i32, i32* %a, i64 %[[I2]] +; INTER: = getelementptr inbounds i32, i32* %a, i64 %[[I3]] ; INTER: br i1 {{.*}}, label %middle.block, label %vector.body ; define void @pointer_iv_non_uniform_0(i32* %a, i64 %n) { @@ -360,13 +357,13 @@ ; CHECK-NOT: LV: Found uniform instruction: %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry] ; CHECK: vector.body ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %next.gep = getelementptr x86_fp80, x86_fp80* %a, i64 %index ; CHECK: %[[I1:.+]] = or i64 %index, 1 -; CHECK: %next.gep2 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I1]] ; CHECK: %[[I2:.+]] = or i64 %index, 2 -; CHECK: %next.gep3 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I2]] ; CHECK: %[[I3:.+]] = or i64 %index, 3 -; CHECK: %next.gep4 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I3]] +; CHECK: = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %index +; CHECK: = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I1]] +; CHECK: = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I2]] +; CHECK: = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I3]] ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; define void @pointer_iv_non_uniform_1(x86_fp80* %a, i64 %n) { @@ -400,8 +397,8 @@ ; CHECK: %pointer.phi = phi i32* [ %a, %vector.ph ], [ %ptr.ind, %vector.body ] ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECK: %[[PTRVEC:.+]] = getelementptr i32, i32* %pointer.phi, <4 x i64> -; CHECK: %next.gep = getelementptr i32*, i32** %b, i64 %index -; CHECK: %[[NEXTGEPBC:.+]] = bitcast i32** %next.gep to <4 x i32*>* +; CHECK: [[NEXTGEP:%.+]] = getelementptr inbounds i32*, i32** %b, i64 %index +; CHECK: %[[NEXTGEPBC:.+]] = bitcast i32** [[NEXTGEP]] to <4 x i32*>* ; CHECK: store <4 x i32*> %[[PTRVEC]], <4 x i32*>* %[[NEXTGEPBC]], align 8 ; CHECK: %ptr.ind = getelementptr i32, i32* %pointer.phi, i64 4 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1578,40 +1578,33 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[TMP11]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[TMP13]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 25 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[IDXPROM]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 25 +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 25 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 50 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 75 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 100 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 125 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 150 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 175 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP1]] +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP2]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP6]] +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[TMP8]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP9]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP10]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 [[IDXPROM]] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[TMP15]], i64 [[IDXPROM]] ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load double, double* [[TMP16]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load double, double* [[TMP17]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, double* [[TMP18]], align 8 @@ -1637,7 +1630,7 @@ ; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = zext <4 x i1> [[TMP45]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP48]] = add <4 x i32> [[VEC_PHI]], [[TMP46]] -; UNROLL-NO-IC-NEXT: [[TMP49]] = add <4 x i32> [[VEC_PHI9]], [[TMP47]] +; UNROLL-NO-IC-NEXT: [[TMP49]] = add <4 x i32> [[VEC_PHI2]], [[TMP47]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 ; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1682,36 +1675,31 @@ ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25 -; UNROLL-NO-VF-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25 -; UNROLL-NO-VF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load double, double* [[TMP4]], align 8 -; UNROLL-NO-VF-NEXT: [[TMP7]] = load double, double* [[TMP5]], align 8 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]] -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = fcmp une double [[TMP9]], 0.000000e+00 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = zext i1 [[TMP10]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = zext i1 [[TMP11]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP14]] = add i32 [[VEC_PHI]], [[TMP12]] -; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]] +; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 25 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[IDXPROM]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = load double, double* [[TMP1]], align 8 +; UNROLL-NO-VF-NEXT: [[TMP3]] = load double, double* [[TMP1]], align 8 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP2]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = fmul double [[TMP2]], [[TMP3]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = fcmp une double [[TMP4]], 0.000000e+00 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = fcmp une double [[TMP5]], 0.000000e+00 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = zext i1 [[TMP6]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP10]] = add i32 [[VEC_PHI]], [[TMP8]] +; UNROLL-NO-VF-NEXT: [[TMP11]] = add i32 [[VEC_PHI2]], [[TMP9]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 +; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] +; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] @@ -1723,10 +1711,10 @@ ; UNROLL-NO-VF-NEXT: [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]] -; UNROLL-NO-VF-NEXT: [[TMP17]] = load double, double* [[ARRAYIDX]], align 8 -; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]] +; UNROLL-NO-VF-NEXT: [[TMP13]] = load double, double* [[ARRAYIDX]], align 8 +; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00 ; UNROLL-NO-VF-NEXT: [[INC:%.*]] = zext i1 [[TOBOOL]] to i32 ; UNROLL-NO-VF-NEXT: [[A_1]] = add nsw i32 [[A_010]], [[INC]] @@ -1747,22 +1735,19 @@ ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 25 -; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]] -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 25 -; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 25 -; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]] -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 25 -; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 25 +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 25 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 50 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 75 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP1]] +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP2]] +; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP3]] +; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[IDXPROM]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP7]], i64 [[IDXPROM]] ; SINK-AFTER-NEXT: [[TMP12:%.*]] = load double, double* [[TMP8]], align 8 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = load double, double* [[TMP9]], align 8 ; SINK-AFTER-NEXT: [[TMP14:%.*]] = load double, double* [[TMP10]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -111,7 +111,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr [3072 x i32], [3072 x i32]* @A, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds [3072 x i32], [3072 x i32]* @A, i64 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> @@ -272,7 +272,7 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], @@ -755,7 +755,7 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -3,7 +3,7 @@ ; TODO: This still crashes with inbounds on the GEPs. define void @test(ptr %p1.start, ptr %p2.start, ptr %p1.end) { -; CHECK-LABEL: @test( +; CHECK-LABEL: define {{[^@]+}}@test( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -40,7 +40,7 @@ } define void @store_pointer_induction(ptr %start, ptr %end) { -; CHECK-LABEL: @store_pointer_induction( +; CHECK-LABEL: define {{[^@]+}}@store_pointer_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64 ; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64 @@ -57,20 +57,16 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <2 x ptr> [[TMP10]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -5,7 +5,7 @@ ; Function Attrs: nofree norecurse nounwind define void @a(i8* readnone %b) { -; CHECK-LABEL: @a( +; CHECK-LABEL: define {{[^@]+}}@a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B1:%.*]] = ptrtoint i8* [[B:%.*]] to i64 ; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq i8* [[B]], null @@ -21,11 +21,11 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], -1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 -3 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* @@ -36,43 +36,41 @@ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP12]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], -1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, i8* null, i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP2]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP15]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* null, i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP3]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP19]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], -1 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* null, i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP4]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP23]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.continue10: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; CHECK: pred.store.if2: +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP16]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] +; CHECK: pred.store.continue3: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 +; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; CHECK: pred.store.if4: +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP20]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] +; CHECK: pred.store.continue5: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; CHECK: pred.store.if6: +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], -3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP24]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] +; CHECK: pred.store.continue7: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -86,8 +84,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[C_05:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[C_05]], i64 -1 -; CHECK-NEXT: [[TMP25:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP25]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP26]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: store i8 95, i8* [[INCDEC_PTR]], align 1 @@ -131,7 +129,7 @@ ; 2. Non-uniform use by the getelementptr which is stored. This requires the ; vector value. define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias %start.2, i64 %N) { -; CHECK-LABEL: @pointer_induction_used_as_vector( +; CHECK-LABEL: define {{[^@]+}}@pointer_induction_used_as_vector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -142,26 +140,33 @@ ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to <4 x i8*>* -; CHECK-NEXT: store <4 x i8*> [[TMP2]], <4 x i8*>* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i8*> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP8]], <4 x i8>* [[TMP9]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[START_2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[START_2]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[START_2]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[START_2]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8*> poison, i8* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i8*> [[TMP8]], i8* [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i8*> [[TMP9]], i8* [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i8*> [[TMP10]], i8* [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8*, i8** [[START_1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP11]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8*, i8** [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to <4 x i8*>* +; CHECK-NEXT: store <4 x i8*> [[TMP13]], <4 x i8*>* [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP17]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP16]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP18]], <4 x i8>* [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -95,43 +95,39 @@ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[PTR1]], i64 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] ; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 14 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 14 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP4]] -; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[PTR1]], i64 [[INDEX]] +; CHECK-NEXT: store double 0.000000e+00, double* [[TMP4]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP5]] -; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP1]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP6]] -; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP2]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.continue10: -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP7]] -; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP3]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; CHECK: pred.store.if4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[PTR1]], i64 [[INDEX]] +; CHECK-NEXT: store double 0.000000e+00, double* [[TMP5]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] +; CHECK: pred.store.continue5: +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; CHECK: pred.store.if6: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[PTR1]], i64 [[INDEX]] +; CHECK-NEXT: store double 0.000000e+00, double* [[TMP6]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] +; CHECK: pred.store.continue7: +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; CHECK: pred.store.if8: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[PTR1]], i64 [[INDEX]] +; CHECK-NEXT: store double 0.000000e+00, double* [[TMP7]], align 8 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] +; CHECK: pred.store.continue9: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -26,13 +26,12 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>* ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -92,13 +91,12 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>* ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1055,8 +1055,10 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION ir<%start>, -1 -; CHECK-NEXT: CLONE vp<[[CLONED_GEP:%.+]]> = getelementptr ir<%ptr.iv>, ir<-1> +; CHECK-NEXT: vp<[[TRANS_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[TRANS_IV]]> +; CHECK-NEXT: CLONE vp<[[SCALAR_PTR_IV:%.+]]> = getelementptr ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: CLONE vp<[[CLONED_GEP:%.+]]> = getelementptr vp<[[SCALAR_PTR_IV]]>, ir<-1> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[CLONED_GEP]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0> ; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1> @@ -1068,7 +1070,8 @@ ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%ptr.iv.next> = getelementptr ir<%ptr.iv>, ir<-1> +; CHECK-NEXT: REPLICATE vp<[[SCALAR_PTR_IV:%.+]]> = getelementptr ir<%start>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE ir<%ptr.iv.next> = getelementptr vp<[[SCALAR_PTR_IV]]>, ir<-1> ; CHECK-NEXT: REPLICATE store ir<95>, ir<%ptr.iv.next> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: