diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2642,7 +2642,7 @@ TruncInst *Trunc = Def->getTruncInst(); IRBuilderBase &Builder = State.Builder; assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); - assert(!State.VF.isZero() && "VF must be non-zero"); + assert(State.VF.isVector() && "must have vector VF"); // The value from the original loop to which we are mapping the new induction // variable. @@ -2695,37 +2695,11 @@ // Now do the actual transformations, and start with creating the step value. Value *Step = CreateStepValue(ID.getStep()); - if (State.VF.isScalar()) { - Value *ScalarIV = CreateScalarIV(Step); - Type *ScalarTy = IntegerType::get(ScalarIV->getContext(), - Step->getType()->getScalarSizeInBits()); - - Instruction::BinaryOps IncOp = ID.getInductionOpcode(); - if (IncOp == Instruction::BinaryOpsEnd) - IncOp = Instruction::Add; - for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx = ConstantInt::get(ScalarTy, Part); - Instruction::BinaryOps MulOp = Instruction::Mul; - if (Step->getType()->isFloatingPointTy()) { - StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType()); - MulOp = Instruction::FMul; - } - - Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); - Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction"); - State.set(Def, EntryPart, Part); - if (Trunc) { - assert(!Step->getType()->isFloatingPointTy() && - "fp inductions shouldn't be truncated"); - addMetadata(EntryPart, Trunc); - } - } - return; - } - // Create a new independent vector induction variable, if one is needed. - if (Def->needsVectorIV()) - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); + // Create a new independent vector induction variable. Later VPlan2VPlan + // optimizations will remove it, if it won't be needed, e.g. because all users + // of it access scalar values. + createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); if (Def->needsScalarIV()) { // Create scalar steps that can be used by instructions we will later @@ -9328,6 +9302,7 @@ // in ways that accessing values using original IR values is incorrect. Plan->disableValue2VPValue(); + VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); VPlanTransforms::sinkScalarOperands(*Plan); VPlanTransforms::mergeReplicateRegions(*Plan); VPlanTransforms::removeDeadRecipes(*Plan, *OrigLoop); @@ -9754,6 +9729,69 @@ State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV); } +void VPScalarIVStepsRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "VPScalarIVStepsRecipe being replicated."); + + // Fast-math-flags propagate from the original induction instruction. + IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); + if (IndDesc.getInductionBinOp() && + isa(IndDesc.getInductionBinOp())) + State.Builder.setFastMathFlags( + IndDesc.getInductionBinOp()->getFastMathFlags()); + + Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + auto *Trunc = dyn_cast(getUnderlyingValue()); + auto CreateScalarIV = [&](Value *&Step) -> Value * { + Value *ScalarIV = State.get(getCanonicalIV(), VPIteration(0, 0)); + auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0); + if (!isCanonical() || CanonicalIV->getType() != IV->getType()) { + ScalarIV = IV->getType()->isIntegerTy() + ? State.Builder.CreateSExtOrTrunc(ScalarIV, IV->getType()) + : State.Builder.CreateCast(Instruction::SIToFP, ScalarIV, + IV->getType()); + ScalarIV = emitTransformedIndex(State.Builder, ScalarIV, + getStartValue()->getLiveInIRValue(), Step, + IndDesc); + ScalarIV->setName("offset.idx"); + } + if (Trunc) { + auto *TruncType = cast(Trunc->getType()); + assert(Step->getType()->isIntegerTy() && + "Truncation requires an integer step"); + ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncType); + Step = State.Builder.CreateTrunc(Step, TruncType); + } + return ScalarIV; + }; + + Value *ScalarIV = CreateScalarIV(Step); + if (State.VF.isVector()) { + buildScalarSteps(ScalarIV, Step, IV, IndDesc, this, State); + return; + } + + for (unsigned Part = 0; Part < State.UF; ++Part) { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *EntryPart; + if (Step->getType()->isFloatingPointTy()) { + Value *StartIdx = + getRuntimeVFAsFloat(State.Builder, Step->getType(), State.VF * Part); + // Floating-point operations inherit FMF via the builder's flags. + Value *MulOp = State.Builder.CreateFMul(StartIdx, Step); + EntryPart = State.Builder.CreateBinOp(IndDesc.getInductionOpcode(), + ScalarIV, MulOp); + } else { + Value *StartIdx = + getRuntimeVF(State.Builder, Step->getType(), State.VF * Part); + EntryPart = State.Builder.CreateAdd( + ScalarIV, State.Builder.CreateMul(StartIdx, Step), "induction"); + } + State.set(this, EntryPart, Part); + if (Trunc) + State.ILV->addMetadata(EntryPart, Trunc); + } +} + void VPWidenPHIRecipe::execute(VPTransformState &State) { State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), this, State); @@ -10161,7 +10199,8 @@ // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, {Part, LastLane})) { // At the moment, VPWidenIntOrFpInductionRecipes can also be uniform. - assert(isa(Def->getDef()) && + assert((isa(Def->getDef()) || + isa(Def->getDef())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -700,6 +700,9 @@ /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + /// Insert an unlinked recipe into \p BB immediately before the insertion + /// point \p IP; + void insertBefore(VPBasicBlock &BB, iplist::iterator IP); /// Insert an unlinked Recipe into a basic block immediately after /// the specified Recipe. @@ -1111,6 +1114,8 @@ return dyn_cast_or_null(getVPValue(0)->getUnderlyingValue()); } + PHINode *getPHINode() { return IV; } + /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } @@ -1814,6 +1819,12 @@ static inline bool classof(const VPDef *D) { return D->getVPDefID() == VPCanonicalIVPHISC; } + static inline bool classof(const VPHeaderPHIRecipe *D) { + return D->getVPDefID() == VPCanonicalIVPHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC; + } /// Generate the canonical scalar induction phi of the vector loop. void execute(VPTransformState &State) override; @@ -1879,6 +1890,53 @@ } }; +/// A recipe for handling phi nodes of integer and floating-point inductions, +/// producing their scalar values. +class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue { + PHINode *IV; + const InductionDescriptor &IndDesc; + +public: + VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc, + VPValue *CanonicalIV, VPValue *Start, VPValue *Step, + Instruction *Trunc) + : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}), + VPValue(Trunc ? Trunc : IV, this), IV(IV), IndDesc(IndDesc) {} + + ~VPScalarIVStepsRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + /// Extra classof implementations to allow directly casting from VPUser -> + /// VPScalarIVStepsRecipe. + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC; + } + + /// Generate the scalarized versions of the phi node as needed by their users. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Returns true if the induction is canonical, i.e. starting at 0 and + /// incremented by UF * VF (= the original IV is incremented by 1). + bool isCanonical() const; + + VPCanonicalIVPHIRecipe *getCanonicalIV() const; + VPValue *getStartValue() const { return getOperand(1); } + VPValue *getStepValue() const { return getOperand(2); } +}; + /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It /// holds a sequence of zero or more VPRecipe's each representing a sequence of /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -583,7 +583,8 @@ case VPWidenSC: case VPWidenGEPSC: case VPReductionSC: - case VPWidenSelectSC: { + case VPWidenSelectSC: + case VPScalarIVStepsSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); (void)I; @@ -608,6 +609,14 @@ Parent->getRecipeList().insert(InsertPos->getIterator(), this); } +void VPRecipeBase::insertBefore(VPBasicBlock &BB, + iplist::iterator I) { + assert(!Parent && "Recipe already in some VPBasicBlock"); + assert(I == BB.end() || I->getParent() == &BB); + Parent = &BB; + BB.getRecipeList().insert(I, this); +} + void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && @@ -634,10 +643,8 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, iplist::iterator I) { - assert(I == BB.end() || I->getParent() == &BB); removeFromParent(); - Parent = &BB; - BB.getRecipeList().insert(I, this); + insertBefore(BB, I); } void VPInstruction::generateInstruction(VPTransformState &State, @@ -875,13 +882,16 @@ auto *IV = getCanonicalIV(); assert(all_of(IV->users(), [](const VPUser *U) { + if (isa(U)) + return true; auto *VPI = cast(U); return VPI->getOpcode() == VPInstruction::CanonicalIVIncrement || VPI->getOpcode() == VPInstruction::CanonicalIVIncrementNUW; }) && - "the canonical IV should only be used by its increments when " + "the canonical IV should only be used by its increments or " + "ScalarIVSteps when " "resetting the start value"); IV->setOperand(0, VPV); } @@ -1272,7 +1282,32 @@ return StartC && StartC->isZero() && StepC && StepC->isOne(); } +VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { + return cast(getOperand(0)); +} + +bool VPScalarIVStepsRecipe::isCanonical() const { + auto *CanIV = getCanonicalIV(); + // The start value of the steps-recipe must match the start value of the + // canonical induction and it must step by 1. + if (CanIV->getStartValue() != getStartValue()) + return false; + auto *StepVPV = getStepValue(); + if (StepVPV->getDef()) + return false; + auto *StepC = dyn_cast_or_null(StepVPV->getLiveInIRValue()); + return StepC && StepC->isOne(); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << Indent << "= SCALAR-STEPS "; + printOperands(O, SlotTracker); +} + void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -54,6 +54,10 @@ /// Try to remove dead recipes. At the moment, only dead header recipes are /// removed. static void removeDeadRecipes(VPlan &Plan, Loop &OrigLoop); + + // If all users of a vector IV need scalar values, provide them by building + // scalar steps off of the canonical scalar IV, and remove the vector IV. + static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -378,3 +378,39 @@ R.eraseFromParent(); } } + +void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { + SmallVector ToRemove; + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *IV = dyn_cast(&Phi); + if (!IV || IV->needsVectorIV()) + continue; + + const InductionDescriptor &ID = IV->getInductionDescriptor(); + const SCEV *StepSCEV = ID.getStep(); + VPValue *Step = nullptr; + if (auto *E = dyn_cast(StepSCEV)) { + Step = new VPValue(E->getValue()); + Plan.addExternalDef(Step); + } else if (auto *E = dyn_cast(StepSCEV)) { + Step = new VPValue(E->getValue()); + Plan.addExternalDef(Step); + } else { + Step = new VPExpandSCEVRecipe(StepSCEV, SE); + } + + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( + IV->getPHINode(), ID, Plan.getCanonicalIV(), IV->getStartValue(), Step, + IV->getTruncInst()); + + HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi()); + if (Step->getDef()) { + // TODO: Place the step in the preheader, once it is explicitly modeled in + // VPlan. + HeaderVPBB->insert(cast(Step->getDef()), + HeaderVPBB->getFirstNonPhi()); + } + IV->replaceAllUsesWith(Steps); + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -332,6 +332,7 @@ VPInterleaveSC, VPReductionSC, VPReplicateSC, + VPScalarIVStepsSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenGEPSC, diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -34,7 +34,7 @@ ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -50,7 +50,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -555,14 +555,6 @@ ; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vector.body: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 -; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 -; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4 @@ -572,6 +564,14 @@ ; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20 ; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24 ; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 +; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 ; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1 ; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1 ; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1 @@ -703,9 +703,9 @@ ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-TWO-CHECK: vec.epilog.vector.body: ; VF-TWO-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32 ; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0 +; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 ; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1 ; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64 @@ -782,14 +782,6 @@ ; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; VF-FOUR-CHECK: vector.body: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 -; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 -; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 -; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4 @@ -799,6 +791,14 @@ ; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20 ; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24 ; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 +; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24 +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28 ; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1 @@ -930,9 +930,9 @@ ; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; VF-FOUR-CHECK: vec.epilog.vector.body: ; VF-FOUR-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32 ; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0 +; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0 ; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1 ; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -8,13 +8,13 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_TRUNC:%.+]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_IND_TRUNC_NEXT:%.+]], %vector.body ] -; CHECK-NEXT: = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] ; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TRUNC]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TRUNC]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TRUNC]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TRUNC]], 3 +; CHECK-NEXT: = add i64 [[INDEX]], 0 ; CHECK-NEXT: store i32 [[TMP7]], i32* %ptr.2, align 4 ; CHECK-NEXT: store i32 [[TMP8]], i32* %ptr.2, align 4 ; CHECK-NEXT: store i32 [[TMP9]], i32* %ptr.2, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -157,11 +157,11 @@ ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[TMP4]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]] ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] ; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 @@ -416,13 +416,13 @@ ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], [[TMP4]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP5]] ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul reassoc float [[FPINC]], 0.000000e+00 ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]] ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] ; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 @@ -664,11 +664,11 @@ ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]] ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01 +; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] ; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 @@ -984,29 +984,29 @@ ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION5:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = sitofp i64 [[INDEX]] to float -; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP6]], -5.000000e-01 -; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = sitofp i64 [[INDEX]] to float -; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP0]], [[TMP8]] -; VEC1_INTERL2-NEXT: [[OFFSET_IDX8:%.*]] = fadd fast float [[TMP9]], [[INIT]] -; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX8]], [[TMP0]] +; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], [[TMP6]] +; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP7]], [[INIT]] +; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] +; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], -5.000000e-01 +; VEC1_INTERL2-NEXT: [[INDUCTION6:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] -; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION5]] -; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX8]], float* [[TMP11]], align 4 -; VEC1_INTERL2-NEXT: store float [[TMP10]], float* [[TMP12]], align 4 -; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX8]], [[TMP0]] -; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[TMP10]], [[TMP0]] -; VEC1_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP7]], 0xBFD99999A0000000 -; VEC1_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[TMP7]], 0xBFECCCCCC0000000 +; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION6]] +; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP11]], align 4 +; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP12]], align 4 +; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] +; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[TMP8]], [[TMP0]] +; VEC1_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP10]], 0xBFD99999A0000000 +; VEC1_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[TMP10]], 0xBFECCCCCC0000000 ; VEC1_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]] ; VEC1_INTERL2-NEXT: [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]] ; VEC1_INTERL2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] -; VEC1_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION5]] +; VEC1_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION6]] ; VEC1_INTERL2-NEXT: store float [[TMP17]], float* [[TMP19]], align 4 ; VEC1_INTERL2-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 ; VEC1_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] -; VEC1_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION5]] +; VEC1_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION6]] ; VEC1_INTERL2-NEXT: store float [[TMP15]], float* [[TMP21]], align 4 ; VEC1_INTERL2-NEXT: store float [[TMP16]], float* [[TMP22]], align 4 ; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -1279,11 +1279,11 @@ ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00 ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00 +; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] ; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 @@ -1471,8 +1471,8 @@ ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] -; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 4 -; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 @@ -1485,13 +1485,13 @@ ; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL2: pred.store.if: ; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] -; VEC4_INTERL2-NEXT: store float [[TMP1]], float* [[TMP9]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP0]], float* [[TMP9]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL2: pred.store.if4: -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 @@ -1500,7 +1500,7 @@ ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; VEC4_INTERL2: pred.store.if6: -; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP1]], 2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 @@ -1509,7 +1509,7 @@ ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; VEC4_INTERL2: pred.store.if8: -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP1]], 3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 @@ -1518,15 +1518,15 @@ ; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; VEC4_INTERL2: pred.store.if10: -; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP1]], 4.000000e+00 -; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]] +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] ; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] ; VEC4_INTERL2: pred.store.continue11: ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; VEC4_INTERL2: pred.store.if12: -; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP1]], 5.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 @@ -1535,7 +1535,7 @@ ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; VEC4_INTERL2: pred.store.if14: -; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP1]], 6.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 @@ -1544,7 +1544,7 @@ ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] ; VEC4_INTERL2: pred.store.if16: -; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP1]], 7.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 @@ -1588,9 +1588,9 @@ ; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: -; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ] -; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 +; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP1]], align 4 @@ -1602,12 +1602,12 @@ ; VEC1_INTERL2-NEXT: store float [[TMP0]], float* [[TMP1]], align 4 ; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC1_INTERL2: pred.store.continue: -; VEC1_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]] -; VEC1_INTERL2: pred.store.if6: +; VEC1_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] +; VEC1_INTERL2: pred.store.if3: ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP2]], align 4 -; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE5]] -; VEC1_INTERL2: pred.store.continue7: +; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] +; VEC1_INTERL2: pred.store.continue4: ; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC1_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -463,14 +463,14 @@ ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6]] ; UNROLL: pred.store.if: -; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]] +; UNROLL-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION3]] ; UNROLL-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 ; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; UNROLL-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 ; UNROLL-NEXT: store i8 [[TMP3]], i8* [[TMP0]], align 1 -; UNROLL-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]] +; UNROLL-NEXT: [[INDUCTION4:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION4]] ; UNROLL-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 ; UNROLL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 ; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 @@ -511,8 +511,8 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]] +; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION3]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -159,9 +159,9 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 @@ -5202,9 +5202,9 @@ ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -6716,9 +6716,9 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll --- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll +++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll @@ -7,15 +7,13 @@ define void @array_at_plus_one(i32 %n) { ; CHECK-LABEL: @array_at_plus_one( ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK-NEXT: [[VEC_IV_1:%.+]] = phi <4 x i64> [ , %vector.ph ], [ [[VEC_IV_1_NEXT:%.+]], %vector.body ] -; CHECK-NEXT: [[VEC_IV_TRUNC:%.+]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_IV_TRUNC_NEXT:%.+]], %vector.body ] +; CHECK: [[VEC_IV_TRUNC:%.+]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_IV_TRUNC_NEXT:%.+]], %vector.body ] ; CHECK: [[T1:%.+]] = add i64 %index, 0 ; CHECK: [[T2:%.+]] = add nsw i64 [[T1]], 12 ; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @array, i64 0, i64 [[T2]] ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, i32* [[GEP]], i32 0 ; CHECK-NEXT: [[BC:%.+]] = bitcast i32* [[GEP0]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[VEC_IV_TRUNC]], <4 x i32>* [[BC]] -; CHECK: [[VEC_IV_1_NEXT]] = add <4 x i64> [[VEC_IV_1]], ; CHECK: [[VEC_IV_TRUNC_NEXT]] = add <4 x i32> [[VEC_IV_TRUNC]], ; CHECK: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -186,10 +186,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], -1 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[N]] ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP16]] @@ -199,7 +199,7 @@ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP20]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = fadd fast <4 x float> [[REVERSE]], -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP23]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[TMP24]], align 4 @@ -222,10 +222,10 @@ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[OFFSET_IDX9:%.*]] = trunc i64 [[INDEX4]] to i32 -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX9]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP27]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX9]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX4]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP26]], -1 ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], [[N]] ; CHECK-NEXT: [[TMP30:%.*]] = sext i32 [[TMP29]] to i64 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP30]] @@ -235,7 +235,7 @@ ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP34]], align 4 ; CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP26]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, float* [[TMP36]], i32 0 ; CHECK-NEXT: [[TMP38:%.*]] = bitcast float* [[TMP37]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP35]], <4 x float>* [[TMP38]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -151,7 +151,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to <4 x i8*>* ; CHECK-NEXT: store <4 x i8*> [[TMP3]], <4 x i8*>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8*> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -32,9 +32,9 @@ ; CHECK-LABEL: @reverse_induction_i128( ; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK: %offset.idx = sub i128 %startval, %index -; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0 -; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4 +; CHECK: [[OFFSET_IDX:%.+]] = sub i128 %startval, %index +; CHECK: %[[a0:.+]] = add i128 [[OFFSET_IDX]], 0 +; CHECK: %[[a4:.+]] = add i128 [[OFFSET_IDX]], -4 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -12,18 +12,18 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[X]] -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[X]] +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], [[X]] ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -53,7 +53,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 @@ -103,7 +103,7 @@ ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] @@ -119,7 +119,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 ; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "for.body:\l" + ; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + -; CHECK-NEXT: " WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + +; CHECK-NEXT: " ir\<%iv\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<0\>, ir\<1\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, ir\<%iv\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + ; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>)\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) @@ -98,8 +98,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) @@ -208,7 +208,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<4> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, ir<%iv> ; CHECK-NEXT: INTERLEAVE-GROUP with factor 4 at %AB.0, ir<%gep.AB.0> ; CHECK-NEXT: ir<%AB.0> = load from index 0 @@ -268,8 +268,8 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%l.a> = load ir<%arrayidx> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%b>, ir<%iv> @@ -308,7 +308,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr ir<%asd>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd> ; CHECK-NEXT: WIDEN ir<%psd> = add ir<%lsd>, ir<23> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: WIDEN-INDUCTION %tmp0 = phi %tmp6, 0 +; CHECK-NEXT: ir<%tmp0> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, ir<%tmp0> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> ; CHECK-NEXT: CLONE store ir<0>, ir<%tmp2>