diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1071,7 +1071,7 @@ return VF.isScalable() ? B.CreateVScale(EC) : EC; } -static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { +Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { assert(FTy->isFloatingPointTy() && "Expected floating point type!"); Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); @@ -2322,26 +2322,15 @@ return Shuf; } +namespace llvm { /// This function adds /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. /// \p Opcode is relevant for FP induction variable. -static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, - Instruction::BinaryOps BinOp, ElementCount VF, - IRBuilder<> &Builder) { - if (VF.isScalar()) { - // When unrolling and the VF is 1, we only need to add a simple scalar. - Type *Ty = Val->getType(); - assert(!Ty->isVectorTy() && "Val must be a scalar"); - - if (Ty->isFloatingPointTy()) { - // Floating-point operations inherit FMF via the builder's flags. - Value *MulOp = Builder.CreateFMul(StartIdx, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp); - } - return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), - "induction"); - } +Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, + Instruction::BinaryOps BinOp, ElementCount VF, + IRBuilder<> &Builder) { + assert(VF.isVector() && "should not be called with scalar VFs"); // Create and check the types. auto *ValVTy = cast(Val->getType()); @@ -2387,6 +2376,7 @@ Value *MulOp = Builder.CreateFMul(InitVec, Step); return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); } +} // namespace llvm void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Value *Start, @@ -2546,28 +2536,6 @@ return ScalarIV; }; - // Create the vector values from the scalar IV, in the absence of creating a - // vector IV. - auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { - Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - assert(!State.VF.isScalable() && "scalable vectors not yet supported."); - Value *StartIdx; - if (Step->getType()->isFloatingPointTy()) - StartIdx = - getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); - else - StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); - - Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(Def, EntryPart, Part); - if (Trunc) - addMetadata(EntryPart, Trunc); - } - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) @@ -2576,41 +2544,46 @@ // Now do the actual transformations, and start with creating the step value. Value *Step = CreateStepValue(ID.getStep()); if (State.VF.isScalar()) { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); Value *ScalarIV = CreateScalarIV(Step); - CreateSplatIV(ScalarIV, Step); - return; - } + for (unsigned Part = 0; Part < UF; ++Part) { + Value *EntryPart; + Value *StartIdx = ConstantInt::get( + IntegerType::get(ScalarIV->getContext(), + Step->getType()->getScalarSizeInBits()), + Part); + if (Step->getType()->isFloatingPointTy()) { + StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType()); + // Floating-point operations inherit FMF via the builder's flags. + Value *MulOp = Builder.CreateFMul(StartIdx, Step); + EntryPart = + Builder.CreateBinOp(ID.getInductionOpcode(), ScalarIV, MulOp); + } else { + EntryPart = Builder.CreateAdd( + ScalarIV, Builder.CreateMul(StartIdx, Step), "induction"); + } + State.set(Def, EntryPart, Part); + if (Trunc) + addMetadata(EntryPart, Trunc); + } - // Determine if we want a scalar version of the induction variable. This is - // true if the induction variable itself is not widened, or if it has at - // least one user in the loop that is not widened. - auto NeedsScalarIV = needsScalarInduction(EntryVal); - if (!NeedsScalarIV) { - createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); return; } - // Try to create a new independent vector induction variable. If we can't - // create the phi node, we will splat the scalar induction variable in each - // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { + bool NeedsVectorIV = !shouldScalarizeInstruction(EntryVal); + // Create a new independent vector induction variable, if one is needed. + if (NeedsVectorIV) createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); + + bool NeedsScalarIV = needsScalarInduction(EntryVal); + if (NeedsScalarIV) { // Create scalar steps that can be used by instructions we will later // scalarize. Note that the addition of the scalar steps will not increase // the number of instructions in the loop in the common case prior to // InstCombine. We will be trading one vector extract for each scalar step. + Value *ScalarIV = CreateScalarIV(Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; } - - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. - Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, @@ -8396,15 +8369,8 @@ assert(CM.foldTailByMasking() && "must fold the tail"); VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); - - VPValue *IV = nullptr; - if (Legal->getPrimaryInduction()) - IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); - else { - auto *IVRecipe = new VPWidenCanonicalIVRecipe(); - HeaderVPBB->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe; - } + auto *IV = new VPWidenCanonicalIVRecipe(); + HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi()); VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); @@ -9175,6 +9141,9 @@ } } + // Check if \p I has only scalar uses. + VPlanTransforms::removeRedundantVPWidenCanonicalIVRecipe( + *Plan, CM.foldTailByMasking(), Legal->getPrimaryInduction(), Range); VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -72,6 +72,16 @@ /// Return a value for Step multiplied by VF. Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, int64_t Step); +Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF); + +/// This function adds +/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) +/// to each vector element of Val. The sequence starts at StartIndex. +/// \p Opcode is relevant for FP induction variable. +Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, + Instruction::BinaryOps BinOp, ElementCount VF, + IRBuilder<> &Builder); + /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 9) = {1, 2, 4, 8} @@ -796,6 +806,7 @@ CanonicalIVIncrement, CanonicalIVIncrementNUW, ExitCheckAndBranch, + StepVector, }; private: @@ -836,6 +847,15 @@ return R->getVPDefID() == VPRecipeBase::VPInstructionSC; } + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPInstructionSC; + } + + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPInstructionSC; + } + unsigned getOpcode() const { return Opcode; } /// Generate the instruction. @@ -2724,6 +2744,7 @@ /// Return true if all visited instruction can be combined. bool isCompletelySLP() const { return CompletelySLP; } }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -747,6 +747,27 @@ } break; } + case VPInstruction::StepVector: { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *Val = State.get(getOperand(0), 0); + Value *Broadcasted = + State.Builder.CreateVectorSplat(State.VF, Val, "broadcast"); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + assert(Val == State.get(getOperand(0), VPIteration(Part, 0)) && + "Val changed for Part"); + Value *Step = State.get(getOperand(1), VPIteration(Part, 0)); + assert(Step->getType()->isIntegerTy() && + "stepvector only supports integer steps for now"); + Value *StartIdx = + getRuntimeVF(State.Builder, Step->getType(), State.VF * Part); + Value *EntryPart = + getStepVector(Broadcasted, StartIdx, Step, Instruction::Add, State.VF, + State.Builder); + State.set(this, EntryPart, Part); + } + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -803,6 +824,9 @@ case VPInstruction::ExitCheckAndBranch: O << "exit-check-and-branch "; break; + case VPInstruction::StepVector: + O << "step-vector"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -45,6 +45,11 @@ /// in the vectorized loop. There is no need to vectorize the cast - the same /// value can be used for both the phi and casts in the vector loop. static void removeRedundantInductionCasts(VPlan &Plan); + + /// Try to replace VPWidenCanonicalIVRecipes with an equivalent existing + /// vector induction or a step-vector based on the canonical scalar induction. + static void removeRedundantVPWidenCanonicalIVRecipe( + VPlan &Plan, bool FoldTail, Value *OriginalCanonicalIV, VFRange &Range); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -324,3 +324,84 @@ E.first->eraseFromParent(); } } + +void VPlanTransforms::removeRedundantVPWidenCanonicalIVRecipe( + VPlan &Plan, bool FoldTail, Value *OriginalCanonicalIV, VFRange &Range) { + if (!OriginalCanonicalIV) + return; + + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPWidenCanonicalIVRecipe *WidenCan = nullptr; + VPWidenIntOrFpInductionRecipe *WidenInduction = nullptr; + for (VPRecipeBase &Phi : *HeaderVPBB) { + if (auto *W = dyn_cast(&Phi)) { + WidenCan = W; + continue; + } + auto *IV = dyn_cast(&Phi); + if (!IV) + continue; + // If the induction recipe is for the primary induction and a vector phi + // will be generated use it directly instead of building a stepvector for + // the canonical IV. + if (IV->getUnderlyingValue() == OriginalCanonicalIV) + WidenInduction = IV; + } + + if (!WidenCan) + return; + + // Use separate scalar plan. + if (Range.Start.isScalar()) + Range.End = Range.Start * 2; + + auto AllUsersScalar = [](VPWidenIntOrFpInductionRecipe *R) { + return all_of(R->users(), [](VPUser *U) { + auto *R = cast(U); + return isa(R); + }); + }; + + auto AllUsersUniform = [](VPWidenIntOrFpInductionRecipe *R) { + return all_of(R->users(), [](VPUser *U) { + auto *R = cast(U); + return isa(R) && + cast(R)->isUniform(); + }); + }; + + assert(WidenInduction && "expected recipe for original canonical IV"); + + // For scalar vectorization factors or in case a vector phi will be generated + // for WidenInduction, use it instead of the widened canonical IV. To check + // whether a vector phi will be generated, check for uniform or scalar uers + // respectively. If the only users of the widened canonical induction are + // ActiveLaneMasks, only scalars are needed and WidenInduction can be used + // unconditionally. + // TODO: Remove checks once a vector phi will be generated for all + // VPWidenIntOrFpInductionRecipes. + if (Range.Start.isScalar() || + (FoldTail && !AllUsersUniform(WidenInduction)) || + !AllUsersScalar(WidenInduction) || + all_of(WidenCan->users(), [](VPUser *U) { + auto *R = dyn_cast(U); + auto *VPI = dyn_cast_or_null(R); + return !VPI || VPI->getOpcode() == VPInstruction::ActiveLaneMask; + })) { + WidenCan->replaceAllUsesWith(WidenInduction); + WidenCan->eraseFromParent(); + return; + } + // Retain WidenCan fora scalar plans. + if (Range.Start.isScalar()) + return; + + auto *One = ConstantInt::get(OriginalCanonicalIV->getType(), 1); + auto *OneVPV = new VPValue(One); + Plan.addExternalDef(OneVPV); + auto *CanIV = Plan.getCanonicalIV(); + auto *R = new VPInstruction(VPInstruction::StepVector, {CanIV, OneVPV}); + HeaderVPBB->insert(R, HeaderVPBB->getFirstNonPhi()); + WidenCan->replaceAllUsesWith(R); + WidenCan->eraseFromParent(); +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -11,10 +11,10 @@ ; CHECK-LABEL: @uniform_load( ; CHECK: vector.body: ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[IDX]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[LOOP_PRED:%.*]] = icmp ule <4 x i64> [[INDUCTION]] ; CHECK-NEXT: [[LOAD_VAL:%.*]] = load i32, i32* %src, align 4 ; CHECK-NOT: load i32, i32* %src, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -14,9 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1 @@ -357,9 +354,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]]) ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], [[STRIDE]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -107,9 +104,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -195,9 +189,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -266,9 +257,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -337,9 +325,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -408,9 +393,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -479,9 +461,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -550,9 +529,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] @@ -621,9 +597,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -32,7 +32,7 @@ ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -58,10 +58,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -73,7 +73,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: @@ -121,10 +121,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -162,10 +162,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -177,7 +177,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -17,9 +17,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -154,7 +154,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; CHECK: pred.store.if29: +; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] @@ -163,10 +163,10 @@ ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue30: +; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; CHECK: pred.store.if31: +; CHECK: pred.store.if26: ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 @@ -176,10 +176,10 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] -; CHECK: pred.store.continue32: +; CHECK: pred.store.continue27: ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; CHECK: pred.store.if33: +; CHECK: pred.store.if28: ; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 @@ -189,10 +189,10 @@ ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]] -; CHECK: pred.store.continue34: +; CHECK: pred.store.continue29: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.if35: +; CHECK: pred.store.if30: ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 @@ -202,7 +202,7 @@ ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] ; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.continue36: +; CHECK: pred.store.continue31: ; CHECK-NEXT: [[INDEX_NEXT37]] = add i64 [[INDEX38]], 4 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT37]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]] @@ -292,7 +292,7 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] -; CHECK: pred.store.if16: +; CHECK: pred.store.if14: ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 @@ -300,10 +300,10 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 ; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] -; CHECK: pred.store.continue17: +; CHECK: pred.store.continue15: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] -; CHECK: pred.store.if18: +; CHECK: pred.store.if16: ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 @@ -311,10 +311,10 @@ ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 ; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.continue19: +; CHECK: pred.store.continue17: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.if20: +; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 ; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.continue21: +; CHECK: pred.store.continue19: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -84,10 +84,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -173,10 +173,10 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -94,10 +94,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0 @@ -166,9 +166,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -14,11 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -87,9 +84,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -172,7 +172,8 @@ ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> -; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> +; CHECK-NEXT: EMIT vp<[[STEPVEC:%.+]]> = step-vector vp<[[CAN_IV]]> ir<1> +; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[STEPVEC]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 @@ -38,11 +35,11 @@ ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_SREM_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.if3: +; CHECK: pred.srem.if1: ; CHECK-NEXT: [[TMP11:%.*]] = srem i16 5786, [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP9]], i16 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.continue4: +; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_SREM_CONTINUE]] ], [ [[TMP12]], [[PRED_SREM_IF3]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> , <2 x i16> [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -31,23 +31,11 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] ; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = mul i64 [[INDEX]], [[INC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX3]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT4]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT7]] -; CHECK-NEXT: [[INDUCTION8:%.*]] = add <2 x i8> [[BROADCAST_SPLAT5]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 @@ -62,10 +50,10 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if11: +; CHECK: pred.store.if4: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: +; CHECK: pred.store.continue5: ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -19,9 +19,6 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], @@ -43,7 +40,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if1: ; CHECK-NEXT: [[TMP14:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw i16 [[TMP14]], -1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 0 @@ -53,7 +50,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP20]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i16> [ [[TMP11]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i16> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP24:%.*]] = add nsw <2 x i16> [[TMP22]], [[TMP23]] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -26,9 +26,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer