diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2361,7 +2361,6 @@ // Determine the number of scalars we need to generate for each unroll // iteration. bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def); - unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); // Compute the scalar steps and save the results in State. Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(), ScalarIVTy->getScalarSizeInBits()); @@ -2375,7 +2374,17 @@ SplatIV = Builder.CreateVectorSplat(State.VF, ScalarIV); } - for (unsigned Part = 0; Part < State.UF; ++Part) { + unsigned StartPart = 0; + unsigned EndPart = State.UF; + unsigned StartLane = 0; + unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); + if (State.Instance) { + StartPart = State.Instance->Part; + EndPart = StartPart + 1; + StartLane = State.Instance->Lane.getKnownLane(); + EndLane = StartLane + 1; + } + for (unsigned Part = StartPart; Part < EndPart; ++Part) { Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part); if (!FirstLaneOnly && State.VF.isScalable()) { @@ -2394,7 +2403,7 @@ if (ScalarIVTy->isFloatingPointTy()) StartIdx0 = Builder.CreateSIToFP(StartIdx0, ScalarIVTy); - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { + for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { Value *StartIdx = Builder.CreateBinOp( AddOp, StartIdx0, getSignedIntOrFpConstant(ScalarIVTy, Lane)); // The step returned by `createStepForVF` is a runtime-evaluated value @@ -9557,8 +9566,6 @@ } void VPScalarIVStepsRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "VPScalarIVStepsRecipe being replicated."); - // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); if (IndDesc.getInductionBinOp() && diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -49,6 +49,7 @@ return cast(getVPSingleValue()->getUnderlyingValue()) ->mayWriteToMemory(); case VPBranchOnMaskSC: + case VPScalarIVStepsSC: return false; case VPWidenIntOrFpInductionSC: case VPWidenCanonicalIVSC: @@ -80,6 +81,7 @@ return cast(getVPSingleValue()->getUnderlyingValue()) ->mayReadFromMemory(); case VPBranchOnMaskSC: + case VPScalarIVStepsSC: return false; case VPWidenIntOrFpInductionSC: case VPWidenCanonicalIVSC: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -122,17 +122,23 @@ } } - // Try to sink each replicate recipe in the worklist. + // Try to sink each replicate or scalar IV steps recipe in the worklist. while (!WorkList.empty()) { VPBasicBlock *SinkTo; - VPValue *C; - std::tie(SinkTo, C) = WorkList.pop_back_val(); - auto *SinkCandidate = dyn_cast_or_null(C->Def); - if (!SinkCandidate || SinkCandidate->isUniform() || - SinkCandidate->getParent() == SinkTo || - SinkCandidate->mayHaveSideEffects() || + VPValue *SinkCandidateValue; + std::tie(SinkTo, SinkCandidateValue) = WorkList.pop_back_val(); + VPRecipeBase *SinkCandidate = SinkCandidateValue->getDefiningRecipe(); + if (!SinkCandidate || SinkCandidate->mayHaveSideEffects() || SinkCandidate->mayReadOrWriteMemory()) continue; + if (auto *RepR = dyn_cast(SinkCandidate)) { + if (RepR->isUniform()) + continue; + } else if (!isa(SinkCandidate)) + continue; + + if (SinkCandidate->getParent() == SinkTo) + continue; bool NeedsDuplicating = false; // All recipe users of the sink candidate must be in the same block SinkTo @@ -140,33 +146,37 @@ // i.e., only first lane is used) . In the latter case, we need to duplicate // SinkCandidate. auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, - SinkCandidate](VPUser *U) { + SinkCandidateValue](VPUser *U) { auto *UI = dyn_cast(U); if (!UI) return false; if (UI->getParent() == SinkTo) return true; - NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate); + NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidateValue); return NeedsDuplicating; }; - if (!all_of(SinkCandidate->users(), CanSinkWithUser)) + if (!all_of(SinkCandidateValue->users(), CanSinkWithUser)) continue; if (NeedsDuplicating) { - Instruction *I = cast(SinkCandidate->getUnderlyingValue()); + // We only know how to duplicate VPReplicateRecipes for now. + if (!isa(SinkCandidate)) + continue; + Instruction *I = cast( + cast(SinkCandidate)->getUnderlyingValue()); auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); - SmallVector Users(SinkCandidate->users()); + SmallVector Users(SinkCandidateValue->users()); for (auto *U : Users) { auto *UI = cast(U); if (UI->getParent() == SinkTo) continue; for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { - if (UI->getOperand(Idx) != SinkCandidate) + if (UI->getOperand(Idx) != SinkCandidateValue) continue; UI->setOperand(Idx, Clone); } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -117,7 +117,6 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: @@ -137,6 +136,7 @@ ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -376,7 +376,6 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: @@ -413,6 +412,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<[[PRED]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -468,7 +468,6 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%.pn> = phi ir<0>, ir<[[L:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<2> + vp<[[CAN_IV]]> * ir<1> -; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 @@ -488,6 +487,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1408,8 +1408,8 @@ ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 ; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] ; VEC4_INTERL1: pred.store.if3: -; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] ; VEC4_INTERL1-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]] @@ -1417,8 +1417,8 @@ ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 ; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL1: pred.store.if5: -; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 +; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL1-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]] @@ -1426,8 +1426,8 @@ ; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 ; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] ; VEC4_INTERL1: pred.store.if7: -; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 +; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL1-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]] @@ -1491,8 +1491,8 @@ ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; VEC4_INTERL2: pred.store.if4: -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] @@ -1500,8 +1500,8 @@ ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; VEC4_INTERL2: pred.store.if6: -; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE6]] @@ -1509,8 +1509,8 @@ ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; VEC4_INTERL2: pred.store.if8: -; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE8]] @@ -1526,8 +1526,8 @@ ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; VEC4_INTERL2: pred.store.if12: -; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE12]] @@ -1535,8 +1535,8 @@ ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; VEC4_INTERL2: pred.store.if14: -; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE14]] @@ -1544,8 +1544,8 @@ ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.if16: -; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE16]] @@ -1662,8 +1662,8 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; VEC2_INTERL1_PRED_STORE: pred.store.if3: -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]] diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -48,7 +48,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[COND:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%cond0> = icmp ult ir<%iv>, ir<13> ; CHECK-NEXT: WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20> @@ -60,6 +59,7 @@ ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -89,7 +89,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -99,6 +98,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue @@ -168,7 +168,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -178,6 +177,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -250,7 +250,6 @@ ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 21, %iv.next, ir<1> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]> vp<[[BTC]]> ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0> @@ -269,6 +268,7 @@ ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.B> = getelementptr ir<%B>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%lv>, ir<%gep.B> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -325,7 +325,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -341,6 +340,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -426,7 +426,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j> @@ -443,6 +442,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue @@ -542,7 +542,6 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j> @@ -558,6 +557,7 @@ ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.load.if: +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> ; CHECK-NEXT: Successor(s): pred.load.continue