diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4131,10 +4131,9 @@ auto *PhiR = dyn_cast(&R); if (!PhiR) continue; - auto *OrigPhi = cast(PhiR->getUnderlyingValue()); - if (isa(PhiR->getDef())) { + if (isa(PhiR->getDef())) fixReduction(cast(PhiR->getDef()), State); - } else if (Legal->isFirstOrderRecurrence(OrigPhi)) + else if (isa(PhiR->getDef())) fixFirstOrderRecurrence(PhiR, State); } } @@ -4213,53 +4212,12 @@ // Create a phi node for the new recurrence. The current value will either be // the initial value inserted into a vector or loop-varying vector value. - auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur"); + auto *VecPhi = cast(State.get(PhiR, 0)); + VecPhi->setName("vector.recur"); VecPhi->addIncoming(VectorInit, LoopVectorPreHeader); - // Get the vectorized previous value of the last part UF - 1. It appears last - // among all unrolled iterations, due to the order of their construction. - Value *PreviousLastPart = State.get(PreviousDef, UF - 1); - - // Find and set the insertion point after the previous value if it is an - // instruction. - BasicBlock::iterator InsertPt; - // Note that the previous value may have been constant-folded so it is not - // guaranteed to be an instruction in the vector loop. - // FIXME: Loop invariant values do not form recurrences. We should deal with - // them earlier. - if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousLastPart)) - InsertPt = LoopVectorBody->getFirstInsertionPt(); - else { - Instruction *PreviousInst = cast(PreviousLastPart); - if (isa(PreviousLastPart)) - // If the previous value is a phi node, we should insert after all the phi - // nodes in the block containing the PHI to avoid breaking basic block - // verification. Note that the basic block may be different to - // LoopVectorBody, in case we predicate the loop. - InsertPt = PreviousInst->getParent()->getFirstInsertionPt(); - else - InsertPt = ++PreviousInst->getIterator(); - } - Builder.SetInsertPoint(&*InsertPt); - - // The vector from which to take the initial value for the current iteration - // (actual or unrolled). Initially, this is the vector phi node. - Value *Incoming = VecPhi; - - // Shuffle the current and previous vector and update the vector parts. - for (unsigned Part = 0; Part < UF; ++Part) { - Value *PreviousPart = State.get(PreviousDef, Part); - Value *PhiPart = State.get(PhiR, Part); - auto *Shuffle = VF.isVector() - ? Builder.CreateVectorSplice(Incoming, PreviousPart, -1) - : Incoming; - PhiPart->replaceAllUsesWith(Shuffle); - cast(PhiPart)->eraseFromParent(); - State.reset(PhiR, Shuffle, Part); - Incoming = PreviousPart; - } - // Fix the latch value of the new recurrence in the vector loop. + Value *Incoming = UF == 0 ? VecPhi : State.get(PreviousDef, UF - 1); VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); // Extract the last vector element in the middle block. This will be the @@ -4752,20 +4710,6 @@ // Phi nodes have cycles, so we need to vectorize them in two stages. This is // stage #1: We create a new vector PHI node with no incoming edges. We'll use // this value when we vectorize all of the instructions that use the PHI. - if (Legal->isFirstOrderRecurrence(P)) { - bool ScalarPHI = - (State.VF.isScalar()) || Cost->isInLoopReduction(cast(PN)); - Type *VecTy = - ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *EntryPart = PHINode::Create( - VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - State.set(PhiR, EntryPart, Part); - } - if (Legal->isFirstOrderRecurrence(P)) - return; - } assert(!Legal->isReductionVariable(P) && "reductions should be handled above"); @@ -8921,7 +8865,7 @@ CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc)); } else { - PhiRecipe = new VPWidenPHIRecipe(Phi, *StartV); + PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); } // Record the incoming value from the backedge, so we can add the incoming @@ -9144,23 +9088,22 @@ // --------------------------------------------------------------------------- // Apply Sink-After legal constraints. + auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { + auto *Region = dyn_cast_or_null(R->getParent()->getParent()); + if (Region && Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && + Region->getNumPredecessors() == 1 && "Expected SESE region!"); + assert(R->getParent()->size() == 1 && + "A recipe in an original replicator region must be the only " + "recipe in its block"); + return Region; + } + return nullptr; + }; for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); - auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { - auto *Region = - dyn_cast_or_null(R->getParent()->getParent()); - if (Region && Region->isReplicator()) { - assert(Region->getNumSuccessors() == 1 && - Region->getNumPredecessors() == 1 && "Expected SESE region!"); - assert(R->getParent()->size() == 1 && - "A recipe in an original replicator region must be the only " - "recipe in its block"); - return Region; - } - return nullptr; - }; auto *TargetRegion = GetReplicateRegion(Target); auto *SinkRegion = GetReplicateRegion(Sink); if (!SinkRegion) { @@ -9192,8 +9135,8 @@ VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); } else { // The sink source is in a replicate region, we need to move the whole - // replicate region, which should only contain a single recipe in the main - // block. + // replicate region, which should only contain a single recipe in the + // main block. auto *SplitBlock = Target->getParent()->splitAt(std::next(Target->getIterator())); @@ -9207,6 +9150,25 @@ } } + for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) { + auto *RecurP = dyn_cast(R.getVPValue()); + if (!RecurP) + continue; + + VPRecipeBase *Prev = + cast(RecurP->getBackedgeValue()->getDef()); + auto *RecurSplice = cast( + Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, + {RecurP, Prev->getVPValue()})); + if (auto *Region = GetReplicateRegion(Prev)) { + VPBasicBlock *Succ = cast(Region->getSingleSuccessor()); + RecurSplice->moveBefore(*Succ, Succ->begin()); + } else + RecurSplice->moveAfter(Prev); + RecurP->replaceAllUsesWith(RecurSplice); + RecurSplice->setOperand(0, RecurP); + } + // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -733,6 +733,7 @@ bool isPhi() const { return getVPDefID() == VPWidenIntOrFpInductionSC || getVPDefID() == VPWidenPHISC || getVPDefID() == VPReductionPHISC || + getVPDefID() == VPFirstOrderRecurrencePHISC || getVPDefID() == VPPredInstPHISC || getVPDefID() == VPWidenCanonicalIVSC; } @@ -778,6 +779,8 @@ SLPLoad, SLPStore, ActiveLaneMask, + FirstOrderRecurrenceSplice, // Combines the incoming and previous values of + // a first-order recurrence. }; private: @@ -1075,10 +1078,12 @@ /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPDef *D) { return D->getVPDefID() == VPRecipeBase::VPWidenPHISC || + D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC || D->getVPDefID() == VPRecipeBase::VPReductionPHISC; } static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPVWidenPHISC || + V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC || V->getVPValueID() == VPValue::VPVReductionPHISC; } @@ -1116,6 +1121,34 @@ VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } }; +/// A recipe for handling first-order recurrence phis. The start value is the +/// first operand of the recipe and the incoming value from the backedge is the +/// second operand. +class VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { +public: + VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) + : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC, + VPFirstOrderRecurrencePHISC, Phi) { + addOperand(&Start); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC; + } + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for handling reduction phis. The start value is the first operand /// of the recipe and the incoming value from the backedge is the second /// operand. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -687,6 +687,18 @@ State.set(this, Call, Part); break; } + case VPInstruction::FirstOrderRecurrenceSplice: { + + Value *Incoming = Part == 0 ? State.get(getOperand(0), Part) + : State.get(getOperand(1), Part - 1); + if (!Incoming->getType()->isVectorTy()) { + State.set(this, Incoming, Part); + } else { + Value *V2 = State.get(getOperand(1), Part); + State.set(this, Builder.CreateVectorSplice(Incoming, V2, -1), Part); + } + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -729,7 +741,9 @@ case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; - + case VPInstruction::FirstOrderRecurrenceSplice: + O << "first-order splice"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1114,6 +1128,35 @@ printOperands(O, SlotTracker); } +void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; + + auto *OriginalPhi = cast(getUnderlyingValue()); + // Unless all incoming values are modeled in VPlan print the original PHI + // directly. + // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming + // values as VPValues. + if (getNumOperands() != OriginalPhi->getNumOperands()) { + O << VPlanIngredient(OriginalPhi); + return; + } + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} + +void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { + PHINode *PN = cast(getUnderlyingValue()); + Type *VecTy = State.VF.isScalar() ? PN->getType() + : VectorType::get(PN->getType(), State.VF); + + Value *EntryPart = PHINode::Create(VecTy, 2, "vec.phi", + &*State.CFG.PrevBB->getFirstInsertionPt()); + State.set(this, EntryPart, 0); +} + void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-REDUCTION-PHI "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -267,27 +267,6 @@ // region. Such dependencies should be rejected during earlier dependence // checks, which guarantee accesses can be re-ordered for vectorization. // - // If a recipe is used by a first-order recurrence phi, we cannot move it at - // the moment: a recipe R feeding a first order recurrence phi must allow - // for a *vector* shuffle to be inserted immediately after it, and therefore - // if R is *scalarized and predicated* it must appear last in its basic - // block. In addition, other recipes may need to "sink after" R, so best if - // R not be moved at all. - auto IsImmovableRecipe = [](VPRecipeBase &R) { - assert(R.getNumDefinedValues() <= 1 && - "no multi-defs are expected in predicated blocks"); - for (VPUser *U : R.getVPValue()->users()) { - auto *UI = dyn_cast(U); - if (!UI) - continue; - if (isa(UI)) - return true; - } - return false; - }; - if (any_of(*Then1, IsImmovableRecipe)) - continue; - // Move recipes to the successor region. for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -102,6 +102,7 @@ VPVWidenIntOrFpIndcutionSC, VPVWidenPHISC, VPVReductionPHISC, + VPVFirstOrderRecurrencePHISC, VPVWidenSelectSC, }; @@ -329,6 +330,7 @@ VPWidenMemoryInstructionSC, VPWidenPHISC, VPReductionPHISC, + VPFirstOrderRecurrencePHISC, VPWidenSC, VPWidenSelectSC }; @@ -360,7 +362,7 @@ } /// Returns the VPValue with index \p I defined by the VPDef. - VPValue *getVPValue(unsigned I) { + VPValue *getVPValue(unsigned I = 0) { assert(DefinedValues[I] && "defined value must be non-null"); return DefinedValues[I]; } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: sink_replicate_region_1 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -37,6 +37,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> +; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -46,17 +47,17 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.1.split: -; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9> +; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%10> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -83,13 +84,14 @@ ; CHECK-LABEL: sink_replicate_region_2 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): loop.0.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.0.split: @@ -102,14 +104,14 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 @@ -141,7 +143,7 @@ ; CHECK-LABEL: sink_replicate_region_3_reduction ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0> @@ -149,6 +151,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%6> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -158,19 +161,19 @@ ; CHECK-NEXT: CondBit: vp<%4> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%6>, ir<%x> (S->V) ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.0.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.0.split: -; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next> +; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> -; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red> +; CHECK-NEXT: EMIT vp<%11> = select vp<%4> ir<%and.red.next> ir<%and.red> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -200,7 +203,7 @@ ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%0> = phi ir<0>, ir<%conv> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> @@ -227,6 +230,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> +; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK: loop.1.split: @@ -239,19 +243,19 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%8>, ir<%x> (S->V) ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%12> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK: loop.2: -; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10> -; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11> +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%11> +; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%12> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -283,7 +287,7 @@ ; CHECK-LABEL: sink_replicate_region_after_replicate_region ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: -; CHECK-NEXT: WIDEN-PHI ir<%recur> = phi ir<0>, ir<%recur.next> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: Successor(s): loop.0 @@ -293,6 +297,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: EMIT vp<%5> = first-order splice ir<%recur> ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.srem ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem: { @@ -302,11 +307,11 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<%5>, ir<%x> ; CHECK-NEXT: Successor(s): pred.srem.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split @@ -321,13 +326,13 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%7> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%rem.div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -337,7 +337,8 @@ ; UNROLL-NO-IC-LABEL: @constant_folded_previous_value( ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , %vector.ph ], [ , %vector.body ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> +; UNROLL-NO-IC: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> +; CHECK-NO-IC-NEXT: add nuw i64 ; UNROLL-NO-IC: br i1 {{.*}}, label %middle.block, label %vector.body ; define void @constant_folded_previous_value() { diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1538,11 +1538,11 @@ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC8]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -172,8 +172,9 @@ ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), %vector.body ] -; CHECK-VF4UF2-NEXT: %[[SPLICE1:.*]] = call @llvm.experimental.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) +; CHECK-VF4UF2: %[[SPLICE1:.*]] = call @llvm.experimental.vector.splice.nxv4i64( %vector.recur, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) ; CHECK-VF4UF2: %[[SPLICE2:.*]] = call @llvm.experimental.vector.splice.nxv4i64( shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer), i32 -1) +; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body entry: br label %scalar.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -766,7 +766,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next -; CHECK-NEXT: WIDEN-PHI ir<%for> = phi ir<0>, ir<%lv.a> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a> ; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> ; CHECK-NEXT: Successor(s): pred.load @@ -788,6 +788,7 @@ ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: +; CHECK-NEXT: EMIT vp<%7> = first-order splice ir<%for> ir<%lv.a> ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: @@ -800,12 +801,12 @@ ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%7>, vp<%6> ; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2