diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1076,7 +1076,7 @@ return VF.isScalable() ? B.CreateVScale(EC) : EC; } -static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { +Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { assert(FTy->isFloatingPointTy() && "Expected floating point type!"); Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); @@ -2327,26 +2327,15 @@ return Shuf; } +namespace llvm { /// This function adds /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. /// \p Opcode is relevant for FP induction variable. -static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, - Instruction::BinaryOps BinOp, ElementCount VF, - IRBuilder<> &Builder) { - if (VF.isScalar()) { - // When unrolling and the VF is 1, we only need to add a simple scalar. - Type *Ty = Val->getType(); - assert(!Ty->isVectorTy() && "Val must be a scalar"); - - if (Ty->isFloatingPointTy()) { - // Floating-point operations inherit FMF via the builder's flags. - Value *MulOp = Builder.CreateFMul(StartIdx, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp); - } - return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), - "induction"); - } +Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, + Instruction::BinaryOps BinOp, ElementCount VF, + IRBuilder<> &Builder) { + assert(VF.isVector() && "should not be called with scalar VFs"); // Create and check the types. auto *ValVTy = cast(Val->getType()); @@ -2392,6 +2381,7 @@ Value *MulOp = Builder.CreateFMul(InitVec, Step); return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); } +} // namespace llvm void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( const InductionDescriptor &II, Value *Step, Value *Start, @@ -2543,28 +2533,6 @@ return ScalarIV; }; - // Create the vector values from the scalar IV, in the absence of creating a - // vector IV. - auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { - Value *Broadcasted = getBroadcastInstrs(ScalarIV); - for (unsigned Part = 0; Part < UF; ++Part) { - assert(!State.VF.isScalable() && "scalable vectors not yet supported."); - Value *StartIdx; - if (Step->getType()->isFloatingPointTy()) - StartIdx = - getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); - else - StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part); - - Value *EntryPart = - getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(), - State.VF, State.Builder); - State.set(Def, EntryPart, Part); - if (Trunc) - addMetadata(EntryPart, Trunc); - } - }; - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); if (ID.getInductionBinOp() && isa(ID.getInductionBinOp())) @@ -2574,7 +2542,27 @@ Value *Step = CreateStepValue(ID.getStep()); if (State.VF.isZero() || State.VF.isScalar()) { Value *ScalarIV = CreateScalarIV(Step); - CreateSplatIV(ScalarIV, Step); + for (unsigned Part = 0; Part < UF; ++Part) { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *EntryPart; + if (Step->getType()->isFloatingPointTy()) { + Value *StartIdx = + getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part); + // Floating-point operations inherit FMF via the builder's flags. + Value *MulOp = Builder.CreateFMul(StartIdx, Step); + EntryPart = + Builder.CreateBinOp(ID.getInductionOpcode(), ScalarIV, MulOp); + } else { + Value *StartIdx = + getRuntimeVF(Builder, Step->getType(), State.VF * Part); + EntryPart = Builder.CreateAdd( + ScalarIV, Builder.CreateMul(StartIdx, Step), "induction"); + } + State.set(Def, EntryPart, Part); + if (Trunc) + addMetadata(EntryPart, Trunc); + } + return; } @@ -2587,26 +2575,15 @@ return; } - // Try to create a new independent vector induction variable. If we can't - // create the phi node, we will splat the scalar induction variable in each - // loop iteration. - if (!shouldScalarizeInstruction(EntryVal)) { + // Try to create a new independent vector induction variable. + if (!shouldScalarizeInstruction(EntryVal)) createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State); - Value *ScalarIV = CreateScalarIV(Step); - // Create scalar steps that can be used by instructions we will later - // scalarize. Note that the addition of the scalar steps will not increase - // the number of instructions in the loop in the common case prior to - // InstCombine. We will be trading one vector extract for each scalar step. - buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); - return; - } - // All IV users are scalar instructions, so only emit a scalar IV, not a - // vectorised IV. Except when we tail-fold, then the splat IV feeds the - // predicate used by the masked loads/stores. + // Create scalar steps that can be used by instructions we will later + // scalarize. Note that the addition of the scalar steps will not increase + // the number of instructions in the loop in the common case prior to + // InstCombine. We will be trading one vector extract for each scalar step. Value *ScalarIV = CreateScalarIV(Step); - if (!Cost->isScalarEpilogueAllowed()) - CreateSplatIV(ScalarIV, Step); buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State); } @@ -8407,9 +8384,7 @@ // This is used instead of IV < TC because TC may wrap, unlike BTC. // Start by constructing the desired canonical IV in the header block. VPValue *IV = nullptr; - if (Legal->getPrimaryInduction()) - IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); - else { + if (!Legal->getPrimaryInduction()) { VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); auto *IVRecipe = new VPWidenCanonicalIVRecipe(); HeaderVPBB->insert(IVRecipe, HeaderVPBB->getFirstNonPhi()); @@ -8428,9 +8403,29 @@ // as a second argument, we only pass the IV here and extract the // tripcount from the transform state where codegen of the VP instructions // happen. + if (!IV) { + // Need scalar steps. + IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); + } BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV}); } else { VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); + if (!IV) { + auto *CanIV = Plan->getCanonicalIV(); + VPValue *StepVector = VPUtils::getCanonicalStepVector(CanIV); + if (!StepVector) { + auto *One = + ConstantInt::get(Legal->getPrimaryInduction()->getType(), 1); + auto *OneVPV = new VPValue(One); + Plan->addExternalDef(OneVPV); + auto *R = + new VPInstruction(VPInstruction::StepVector, {CanIV, OneVPV}); + Builder.getInsertBlock()->insert(R, NewInsertionPoint); + StepVector = R; + } + IV = StepVector; + } + BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC}); } return BlockMaskCache[BB] = BlockMask; @@ -8799,6 +8794,31 @@ return Region; } +bool VPRecipeBuilder::onlyScalarStepsNeeded(Instruction *Instr, + VFRange &Range) const { + auto ShouldScalarizeInstruction = [this](Instruction *I, ElementCount VF) { + return CM.isScalarAfterVectorization(I, VF) || + CM.isProfitableToScalarize(I, VF); + }; + + auto NeedsScalarInduction = [&](ElementCount VF) { + if (ShouldScalarizeInstruction(Instr, VF)) + return true; + auto IsScalarInst = [&](User *U) -> bool { + auto *I = cast(U); + return (OrigLoop->contains(I) && ShouldScalarizeInstruction(I, VF)); + }; + return any_of(Instr->users(), IsScalarInst); + }; + + return LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { + return VF.isVector() && ShouldScalarizeInstruction(Instr, VF) && + NeedsScalarInduction(VF); + }, + Range); +} + VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, ArrayRef Operands, @@ -9117,6 +9137,8 @@ "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); + // Replace StepVector recipes of the canonical induction by the widened vector + // induction, if it exists. // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. @@ -9183,6 +9205,12 @@ } } + VPlanTransforms::removeRedundantStepVector( + *Plan, + [&RecipeBuilder, &Range](Instruction *I) { + return RecipeBuilder.onlyScalarStepsNeeded(I, Range); + }, + Legal->getPrimaryInduction()); VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -174,6 +174,8 @@ /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. void fixHeaderPhis(); + + bool onlyScalarStepsNeeded(Instruction *I, VFRange &Range) const; }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -72,6 +72,16 @@ /// Return a value for Step multiplied by VF. Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, int64_t Step); +Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF); + +/// This function adds +/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) +/// to each vector element of Val. The sequence starts at StartIndex. +/// \p Opcode is relevant for FP induction variable. +Value *getStepVector(Value *Val, Value *StartIdx, Value *Step, + Instruction::BinaryOps BinOp, ElementCount VF, + IRBuilder<> &Builder); + /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 9) = {1, 2, 4, 8} @@ -798,6 +808,7 @@ ActiveLaneMask, InductionIncrement, InductionIncrementNUW, + StepVector, }; private: @@ -838,6 +849,15 @@ return R->getVPDefID() == VPRecipeBase::VPInstructionSC; } + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPInstructionSC; + } + + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPInstructionSC; + } + unsigned getOpcode() const { return Opcode; } /// Generate the instruction. @@ -2697,6 +2717,13 @@ /// Return true if all visited instruction can be combined. bool isCompletelySLP() const { return CompletelySLP; } }; + +struct VPUtils { + VPUtils() = delete; + + static VPInstruction *getCanonicalStepVector(VPCanonicalIVPHIRecipe *R); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -729,6 +729,31 @@ break; } + case VPInstruction::StepVector: { + assert(!State.VF.isScalable() && "scalable vectors not yet supported."); + Value *Val = State.get(getOperand(0), VPIteration(0, 0)); + Value *Broadcasted = + State.Builder.CreateVectorSplat(State.VF, Val, "broadcast"); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + assert(Val == State.get(getOperand(0), VPIteration(Part, 0)) && + "Val changed for Part"); + Value *Step = State.get(getOperand(1), VPIteration(Part, 0)); + Value *StartIdx; + if (Step->getType()->isFloatingPointTy()) + StartIdx = getRuntimeVFAsFloat(State.Builder, Step->getType(), + State.VF * Part); + else + StartIdx = + getRuntimeVF(State.Builder, Step->getType(), State.VF * Part); + + Value *EntryPart = + getStepVector(Broadcasted, StartIdx, Step, Instruction::Add, State.VF, + State.Builder); + State.set(this, EntryPart, Part); + } + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -782,6 +807,9 @@ case VPInstruction::InductionIncrementNUW: O << "induction increment nuw"; break; + case VPInstruction::StepVector: + O << "step-vector"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1579,3 +1607,13 @@ for (VPValue *Def : Recipe.definedValues()) assignSlot(Def); } + +VPInstruction * +VPUtils::getCanonicalStepVector(VPCanonicalIVPHIRecipe *CanonicalIV) { + for (VPUser *U : CanonicalIV->users()) { + if (auto *R = dyn_cast(U)) + if (R->getOpcode() == VPInstruction::StepVector) + return R; + } + return nullptr; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -45,6 +45,10 @@ /// in the vectorized loop. There is no need to vectorize the cast - the same /// value can be used for both the phi and casts in the vector loop. static void removeRedundantInductionCasts(VPlan &Plan); + + static void removeRedundantStepVector( + VPlan &Plan, function_ref OnlyScalarStepsNeeded, + Value *OriginalCanonicalIV); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -324,3 +324,27 @@ E.first->eraseFromParent(); } } + +void VPlanTransforms::removeRedundantStepVector( + VPlan &Plan, function_ref OnlyScalarStepsNeeded, + Value *OriginalCanonicalIV) { + auto *CanonicalStepVector = + VPUtils::getCanonicalStepVector(Plan.getCanonicalIV()); + if (!CanonicalStepVector) + return; + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *IV = dyn_cast(&Phi); + if (!IV) + continue; + // If the induction recipe is for the primary induction and a vector phi + // will be generated use it directly instead of building a stepvector for + // the canonical IV. + if (IV->getUnderlyingValue() == OriginalCanonicalIV && + !OnlyScalarStepsNeeded(IV->getUnderlyingInstr())) { + CanonicalStepVector->replaceAllUsesWith(IV); + CanonicalStepVector->eraseFromParent(); + return; + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -330,6 +330,7 @@ VPWidenMemoryInstructionSC, VPWidenSC, VPWidenSelectSC, + VPStepVectorSC, // Phi-like recipes. Need to be kept together. VPBlendSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -11,10 +11,10 @@ ; CHECK-LABEL: @uniform_load( ; CHECK: vector.body: ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[IDX]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK-NEXT: [[LOOP_PRED:%.*]] = icmp ule <4 x i64> [[INDUCTION]] ; CHECK-NEXT: [[LOAD_VAL:%.*]] = load i32, i32* %src, align 4 ; CHECK-NOT: load i32, i32* %src, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -14,9 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1 @@ -357,9 +354,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]]) ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], [[STRIDE]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -107,9 +104,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[TMP0]] @@ -195,9 +189,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -266,9 +257,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -337,9 +325,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -408,9 +393,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -479,9 +461,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]] @@ -550,9 +529,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] @@ -621,9 +597,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -19,9 +19,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -32,7 +32,7 @@ ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> [[TMP6]], <64 x i8>* [[TMP7]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -58,10 +58,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -73,7 +73,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: @@ -121,10 +121,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -162,10 +162,10 @@ ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: ; AUTOVF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[INDUCTION:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[INDUCTION]], ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 @@ -177,7 +177,7 @@ ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> [[TMP6]], <32 x i8>* [[TMP7]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; AUTOVF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: ; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AUTOVF: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -17,9 +17,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -154,7 +154,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; CHECK: pred.store.if29: +; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] @@ -163,10 +163,10 @@ ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue30: +; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1 ; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; CHECK: pred.store.if31: +; CHECK: pred.store.if26: ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 @@ -176,10 +176,10 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] ; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] -; CHECK: pred.store.continue32: +; CHECK: pred.store.continue27: ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2 ; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] -; CHECK: pred.store.if33: +; CHECK: pred.store.if28: ; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 @@ -189,10 +189,10 @@ ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] ; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]] -; CHECK: pred.store.continue34: +; CHECK: pred.store.continue29: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3 ; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.if35: +; CHECK: pred.store.if30: ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 @@ -202,7 +202,7 @@ ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] ; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]] -; CHECK: pred.store.continue36: +; CHECK: pred.store.continue31: ; CHECK-NEXT: [[INDEX_NEXT37]] = add i64 [[INDEX38]], 4 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT37]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]] @@ -292,7 +292,7 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] -; CHECK: pred.store.if16: +; CHECK: pred.store.if14: ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 @@ -300,10 +300,10 @@ ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 ; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] -; CHECK: pred.store.continue17: +; CHECK: pred.store.continue15: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] -; CHECK: pred.store.if18: +; CHECK: pred.store.if16: ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 @@ -311,10 +311,10 @@ ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 ; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] -; CHECK: pred.store.continue19: +; CHECK: pred.store.continue17: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.if20: +; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 @@ -322,7 +322,7 @@ ; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 ; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] -; CHECK: pred.store.continue21: +; CHECK: pred.store.continue19: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -84,10 +84,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 @@ -173,10 +173,10 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -94,10 +94,10 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0 @@ -166,9 +166,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -14,11 +14,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION1:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[INDUCTION2:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 @@ -87,9 +84,6 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -164,7 +164,8 @@ ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> -; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> +; CHECK-NEXT: EMIT vp<[[STEPVEC:%.+]]> = step-vector vp<[[CAN_IV]]> ir<1> +; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[STEPVEC]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -18,9 +18,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 99, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 @@ -38,11 +35,11 @@ ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_SREM_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.if3: +; CHECK: pred.srem.if1: ; CHECK-NEXT: [[TMP11:%.*]] = srem i16 5786, [[TMP2]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP9]], i16 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]] -; CHECK: pred.srem.continue4: +; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP9]], [[PRED_SREM_CONTINUE]] ], [ [[TMP12]], [[PRED_SREM_IF3]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> , <2 x i16> [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -31,12 +31,6 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[INC]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INC]], i32 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 0, [[INC]] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 @@ -45,12 +39,6 @@ ; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = mul i64 [[INDEX]], [[INC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX5]] to i8 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[INC]] to i8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i8> poison, i8 [[TMP6]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT6]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i8> , [[DOTSPLAT9]] -; CHECK-NEXT: [[INDUCTION10:%.*]] = add <2 x i8> [[BROADCAST_SPLAT7]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i8 0, [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] @@ -62,10 +50,10 @@ ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.if11: +; CHECK: pred.store.if4: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] -; CHECK: pred.store.continue12: +; CHECK: pred.store.continue5: ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[TMP10]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -19,9 +19,6 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], @@ -43,7 +40,7 @@ ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if1: ; CHECK-NEXT: [[TMP14:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw i16 [[TMP14]], -1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 0 @@ -53,7 +50,7 @@ ; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[TMP19]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP20]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i16> [ [[TMP11]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i16> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP24:%.*]] = add nsw <2 x i16> [[TMP22]], [[TMP23]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -9,7 +9,7 @@ ; CHECK: vector.body: ; CHECK: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 ; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, -; CHECK-NEXT: icmp ule <4 x i64> +; CHECK: icmp ule <4 x i64> ; CHECK: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -26,9 +26,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[EXTRA_ITER]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer