diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3102,10 +3102,9 @@ BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc()); ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI, - nullptr, Twine(Prefix) + "vector.body"); - - // Update dominator for loop exit. + // Update dominator for loop exit. During skeleton creation, only the vector + // pre-header and the middle block are created. The vector loop is entirely + // created during VPlan exection. if (!Cost->requiresScalarEpilogue(VF)) // If there is an epilogue which must run, there's no edge from the // middle block to exit blocks and thus no need to update the immediate @@ -3244,7 +3243,7 @@ |/ | | v | [ ] \ - | [ ]_| <-- vector loop. + | [ ]_| <-- vector loop (created during VPlan execution). | | | v \ -[ ] <--- middle-block. @@ -7600,10 +7599,11 @@ // Perform the actual loop transformation. - // 1. Create a new empty loop. Unlink the old loop and connect the new one. + // 1. Set up the skeleton for vectorization, including vector pre-header and + // middle block. The vector loop is created during VPlan execution. VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; - std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) = + std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); ILV.collectPoisonGeneratingRecipes(State); @@ -8670,8 +8670,6 @@ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); - if (IsVPlanNative) - Header = cast(Header->getSingleSuccessor()); Header->insert(CanonicalIVPHI, Header->begin()); auto *CanonicalIVIncrement = @@ -8681,10 +8679,8 @@ CanonicalIVPHI->addOperand(CanonicalIVIncrement); VPBasicBlock *EB = TopRegion->getExitBasicBlock(); - if (IsVPlanNative) { - EB = cast(EB->getSinglePredecessor()); + if (IsVPlanNative) EB->setCondBit(nullptr); - } EB->appendRecipe(CanonicalIVIncrement); auto *BranchOnCount = @@ -8753,12 +8749,17 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create initial VPlan skeleton, with separate header and latch blocks. - VPBasicBlock *HeaderVPBB = new VPBasicBlock(); + // Create initial VPlan skeleton, starting with a block for the pre-header, + // followed by a region for the vector loop. The skeleton vector loop region + // contains a header and latch block. + VPBasicBlock *Preheader = new VPBasicBlock("vector.ph"); + auto Plan = std::make_unique(Preheader); + + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - auto Plan = std::make_unique(TopRegion); + VPBlockUtils::insertBlockAfter(TopRegion, Preheader); Instruction *DLInst = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); @@ -8777,7 +8778,8 @@ // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - VPBB->setName(BB->getName()); + if (VPBB != HeaderVPBB) + VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. @@ -9065,6 +9067,31 @@ [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, DeadInstructions, *PSE.getSE()); + // Update plan to be compatible with the inner loop vectorizer for + // code-generation. + VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); + VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock(); + VPBasicBlock *Exit = LoopRegion->getExitBasicBlock(); + VPBlockBase *Latch = Exit->getSinglePredecessor(); + VPBlockBase *Header = Preheader->getSingleSuccessor(); + + // 1. Move preheader block out of main vector loop. + Preheader->setParent(LoopRegion->getParent()); + VPBlockUtils::disconnectBlocks(Preheader, Header); + VPBlockUtils::connectBlocks(Preheader, LoopRegion); + Plan->setEntry(Preheader); + + // 2. Disconnect backedge and exit block. + VPBlockUtils::disconnectBlocks(Latch, Header); + VPBlockUtils::disconnectBlocks(Latch, Exit); + + // 3. Update entry and exit of main vector loop region. + LoopRegion->setEntry(Header); + LoopRegion->setExit(Latch); + + // 4. Remove exit block. + delete Exit; + addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(), true, true); return Plan; @@ -9442,13 +9469,14 @@ auto &DL = EntryVal->getModule()->getDataLayout(); + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); // Generate code for the induction step. Note that induction steps are // required to be loop-invariant auto CreateStepValue = [&](const SCEV *Step) -> Value * { if (SE.isSCEVable(IV->getType())) { SCEVExpander Exp(SE, DL, "induction"); return Exp.expandCodeFor(Step, Step->getType(), - State.CFG.VectorPreHeader->getTerminator()); + VectorPH->getTerminator()); } return cast(Step)->getValue(); }; @@ -9466,7 +9494,7 @@ // Construct the initial value of the vector IV in the vector loop preheader auto CurrIP = Builder.saveIP(); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPH->getTerminator()); if (isa(EntryVal)) { assert(Start->getType()->isIntegerTy() && "Truncation requires an integer type"); @@ -9530,13 +9558,13 @@ } LastInduction->setName("vec.ind.next"); - VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader); + VecInd->addIncoming(SteppedStart, VectorPH); // Add induction update using an incorrect block temporarily. The phi node // will be fixed after VPlan execution. Note that at this point the latch // block cannot be used, as it does not exist yet. // TODO: Model increment value in VPlan, by turning the recipe into a // multi-def and a subclass of VPHeaderPHIRecipe. - VecInd->addIncoming(LastInduction, State.CFG.VectorPreHeader); + VecInd->addIncoming(LastInduction, VectorPH); } void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { @@ -9591,7 +9619,9 @@ Type *ScStValueType = ScalarStartValue->getType(); PHINode *NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); - NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); // A pointer induction, performed by using a gep const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout(); @@ -9612,7 +9642,7 @@ // block cannot be used, as it does not exist yet. // TODO: Model increment value in VPlan, by turning the recipe into a // multi-def and a subclass of VPHeaderPHIRecipe. - NewPointerPhi->addIncoming(InductionGEP, State.CFG.VectorPreHeader); + NewPointerPhi->addIncoming(InductionGEP, VectorPH); // Create UF many actual address geps that use the pointer // phi as base and a vectorized version of the step value diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -311,13 +311,6 @@ /// vector loop. BasicBlock *ExitBB = nullptr; - /// The IR BasicBlock that is the preheader of the vector loop in the output - /// IR. - /// FIXME: The vector preheader should also be modeled in VPlan, so any code - /// that needs to be added to the preheader gets directly generated by - /// VPlan. There should be no need to manage a pointer to the IR BasicBlock. - BasicBlock *VectorPreHeader = nullptr; - /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case /// of replication, maps the BasicBlock of the last replica created. SmallDenseMap VPBB2IRBB; @@ -327,6 +320,10 @@ SmallVector VPBBsToFix; CFGState() = default; + + /// Returns the BasicBlock* mapped to the pre-header of the loop region + /// containing \p R. + BasicBlock *getPreheaderBBFor(VPRecipeBase *R); } CFG; /// Hold a pointer to LoopInfo to register new basic blocks in the loop. @@ -2077,6 +2074,8 @@ /// SplitAt to the new block. Returns the new block. VPBasicBlock *splitAt(iterator SplitAt); + VPRegionBlock *getEnclosingLoopRegion(); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. @@ -2170,6 +2169,12 @@ ExitBlock->setParent(this); } + /// Returns the pre-header VPBasicBlock of the loop region. + VPBasicBlock *getPreheaderVPBB() { + assert(!isReplicator() && "should only get pre-header of loop regions"); + return getSinglePredecessor()->getExitBasicBlock(); + } + /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool isReplicator() const { return IsReplicator; } @@ -2647,10 +2652,14 @@ /// Returns the VPRegionBlock of the vector loop. VPRegionBlock *getVectorLoopRegion() { - return cast(getEntry()); + if (auto *R = dyn_cast(getEntry())) + return R; + return cast(getEntry()->getSingleSuccessor()); } const VPRegionBlock *getVectorLoopRegion() const { - return cast(getEntry()); + if (auto *R = dyn_cast(getEntry())) + return R; + return cast(getEntry()->getSingleSuccessor()); } /// Returns the canonical induction recipe of the vector loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -245,6 +245,10 @@ // set(Def, Extract, Instance); return Extract; } +BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) { + VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); + return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; +} BasicBlock * VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { @@ -277,20 +281,34 @@ assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); - if (isa(PredBBTerminator)) { + + auto *TermBr = dyn_cast(PredBBTerminator); + if (isa(PredBBTerminator) || + (TermBr && !TermBr->isConditional())) { assert(PredVPSuccessors.size() == 1 && "Predecessor ending w/o branch must have single successor."); - DebugLoc DL = PredBBTerminator->getDebugLoc(); - PredBBTerminator->eraseFromParent(); - auto *Br = BranchInst::Create(NewBB, PredBB); - Br->setDebugLoc(DL); + if (TermBr) { + TermBr->setSuccessor(0, NewBB); + } else { + DebugLoc DL = PredBBTerminator->getDebugLoc(); + PredBBTerminator->eraseFromParent(); + auto *Br = BranchInst::Create(NewBB, PredBB); + Br->setDebugLoc(DL); + } } else { - assert(PredVPSuccessors.size() == 2 && - "Predecessor ending with branch must have two successors."); - unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; - assert(!PredBBTerminator->getSuccessor(idx) && - "Trying to reset an existing successor block."); - PredBBTerminator->setSuccessor(idx, NewBB); + if (PredVPSuccessors.size() == 2) { + unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; + assert(!PredBBTerminator->getSuccessor(idx) && + "Trying to reset an existing successor block."); + PredBBTerminator->setSuccessor(idx, NewBB); + } else { + auto *Reg = dyn_cast(PredVPBB->getParent()); + assert(Reg && !Reg->isReplicator()); + assert(this == Reg->getSingleSuccessor()); + PredBBTerminator->setSuccessor(0, NewBB); + PredBBTerminator->setSuccessor( + 1, CFG.VPBB2IRBB[Reg->getEntryBasicBlock()]); + } } } return NewBB; @@ -302,40 +320,36 @@ VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. + auto IsNonReplicateR = [](VPBlockBase *BB) { + auto *R = dyn_cast(BB); + return R && !R->isReplicator(); + }; + // 1. Create an IR basic block, or reuse the last one if possible. // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; + // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor; and + // is PrevVPBB and the latter has a single (hierarchical) successor which + // both are in the same non-replicator region; and // C. when the current VPBB is an entry of a region replica - where PrevVPBB // is the exit of this region from a previous instance, or the predecessor // of this region. if (PrevVPBB && /* A */ !((SingleHPred = getSingleHierarchicalPredecessor()) && SingleHPred->getExitBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ + PrevVPBB->getSingleHierarchicalSuccessor() && + (SingleHPred->getParent() == getEnclosingLoopRegion() && + !IsNonReplicateR(SingleHPred))) && /* B */ + !(Replica && getPredecessors().empty())) { /* C */ NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); // Temporarily terminate with unreachable until CFG is rewired. UnreachableInst *Terminator = State->Builder.CreateUnreachable(); // Register NewBB in its loop. In innermost loops its the same for all BB's. - State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); + if (State->CurrentVectorLoop) + State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); State->Builder.SetInsertPoint(Terminator); State->CFG.PrevBB = NewBB; - } else { - // If the current VPBB is re-using the header block from skeleton creation, - // move it to the new vector loop. - VPBasicBlock *HeaderVPBB = - getPlan()->getVectorLoopRegion()->getEntryBasicBlock(); - if (EnableVPlanNativePath) - HeaderVPBB = cast(HeaderVPBB->getSingleSuccessor()); - if (this == HeaderVPBB) { - assert(State->CurrentVectorLoop); - State->LI->removeBlock(State->CFG.PrevBB); - State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB, - *State->LI); - } } // 2. Fill the IR basic block with IR instructions. @@ -409,6 +423,16 @@ return SplitBlock; } +VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { + VPRegionBlock *P = getParent(); + if (P && P->isReplicator()) { + P = P->getParent(); + assert(!cast(P)->isReplicator() && + "unexpected nested replicate regions"); + } + return P; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { if (getSuccessors().empty()) { @@ -465,7 +489,8 @@ // Create and register the new vector loop. Loop *PrevLoop = State->CurrentVectorLoop; State->CurrentVectorLoop = State->LI->AllocateLoop(); - Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader); + BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; + Loop *ParentLoop = State->LI->getLoopFor(VectorPH); // Insert the new loop into the loop nest and register the new basic blocks // before calling any utilities such as SCEV that require valid LoopInfo. @@ -476,20 +501,6 @@ // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { - if (EnableVPlanNativePath) { - // The inner loop vectorization path does not represent loop preheader - // and exit blocks as part of the VPlan. In the VPlan-native path, skip - // vectorizing loop preheader block. In future, we may replace this - // check with the check for loop preheader. - if (Block->getNumPredecessors() == 0) - continue; - - // Skip vectorizing loop exit block. In future, we may replace this - // check with the check for loop exit. - if (Block->getNumSuccessors() == 0) - continue; - } - LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } @@ -886,7 +897,7 @@ // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { - IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator()); + IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TripCountV->getType(), 1), "trip.count.minus.1"); @@ -923,9 +934,9 @@ } } -/// Generate the code inside the body of the vectorized loop. Assumes a single -/// LoopVectorBody basic-block was created for this. Introduce additional -/// basic-blocks as needed, and fill them all. +/// Generate the code inside the preheader and body of the vectorized loop. +/// Assumes a single pre-header basic-block was created for this. Introduce +/// additional basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { // Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) @@ -933,21 +944,11 @@ // Initialize CFG state. State->CFG.PrevVPBB = nullptr; - BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor(); - State->CFG.PrevBB = VectorHeaderBB; - State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor(); - State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB); - - // Remove the edge between Header and Latch to allow other connections. - // Temporarily terminate with unreachable until CFG is rewired. - // Note: this asserts the generated code's assumption that - // getFirstInsertionPt() can be dereferenced into an Instruction. - VectorHeaderBB->getTerminator()->eraseFromParent(); - State->Builder.SetInsertPoint(VectorHeaderBB); - UnreachableInst *Terminator = State->Builder.CreateUnreachable(); - State->Builder.SetInsertPoint(Terminator); - - // Generate code in loop body. + State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); + BasicBlock *VectorPreHeader = State->CFG.PrevBB; + State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); + + // Generate code in the loop pre-header and body. for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); @@ -974,10 +975,6 @@ // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); - if (Header->empty()) { - assert(EnableVPlanNativePath); - Header = cast(Header->getSingleSuccessor()); - } for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. if (isa(&R)) @@ -1029,9 +1026,12 @@ } // We do not attempt to preserve DT for outer loop vectorization currently. - if (!EnableVPlanNativePath) + if (!EnableVPlanNativePath) { + BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; + State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader); updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, State->CFG.ExitBB); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1452,7 +1452,9 @@ Value *Start = getStartValue()->getLiveInIRValue(); PHINode *EntryPart = PHINode::Create( Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(Start, State.CFG.VectorPreHeader); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + EntryPart->addIncoming(Start, VectorPH); EntryPart->setDebugLoc(DL); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) State.set(this, EntryPart, Part); @@ -1469,11 +1471,12 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.Instance && "cannot be used in per-lane"); - const DataLayout &DL = - State.CFG.VectorPreHeader->getModule()->getDataLayout(); + const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); - Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), - State.CFG.VectorPreHeader->getTerminator()); + + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + Value *Res = + Exp.expandCodeFor(Expr, Expr->getType(), VectorPH->getTerminator()); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) State.set(this, Res, Part); @@ -1526,11 +1529,12 @@ ? VectorInit->getType() : VectorType::get(VectorInit->getType(), State.VF); + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); if (State.VF.isVector()) { auto *IdxTy = Builder.getInt32Ty(); auto *One = ConstantInt::get(IdxTy, 1); IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPH->getTerminator()); auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); auto *LastIdx = Builder.CreateSub(RuntimeVF, One); VectorInit = Builder.CreateInsertElement( @@ -1540,7 +1544,7 @@ // Create a phi node for the new recurrence. PHINode *EntryPart = PHINode::Create( VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); + EntryPart->addIncoming(VectorInit, VectorPH); State.set(this, EntryPart, 0); } @@ -1576,6 +1580,8 @@ State.set(this, EntryPart, Part); } + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + // Reductions do not have to start at zero. They can start with // any loop invariant values. VPValue *StartVPV = getStartValue(); @@ -1590,7 +1596,7 @@ Iden = StartV; } else { IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPH->getTerminator()); StartV = Iden = Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); } @@ -1601,7 +1607,7 @@ if (!ScalarPHI) { Iden = Builder.CreateVectorSplat(State.VF, Iden); IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPH->getTerminator()); Constant *Zero = Builder.getInt32(0); StartV = Builder.CreateInsertElement(Iden, StartV, Zero); } @@ -1612,7 +1618,7 @@ // Make sure to add the reduction start value only to the // first unroll part. Value *StartVal = (Part == 0) ? StartV : Iden; - cast(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader); + cast(EntryPart)->addIncoming(StartVal, VectorPH); } } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -10,6 +10,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -19,6 +19,7 @@ ; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i ; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] ; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5 +; CHECK: define void @PR31671( ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -136,8 +136,8 @@ ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[N]], 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]] -; CHECK: vector.ph10: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH9:%.*]] +; CHECK: vector.ph9: ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 ; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4 @@ -145,8 +145,8 @@ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] -; CHECK: vector.body9: -; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ] +; CHECK: vector.body19: +; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -13,6 +13,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -98,6 +101,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -168,6 +174,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -238,6 +247,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -333,6 +345,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -41,6 +41,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -7,7 +7,17 @@ ; Verify that -vplan-print-in-dot-format option works. define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable { -; CHECK: subgraph cluster_N0 { +; CHECK: digraph VPlan { +; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"] +; CHECK-NEXT: node [shape=rect, fontname=Courier, fontsize=30] +; CHECK-NEXT: edge [fontname=Courier, fontsize=30] +; CHECK-NEXT: compound=true +; CHECK-NEXT: N0 [label = +; CHECK-NEXT: "vector.ph:\l" + +; CHECK-NEXT: "Successor(s): vector loop\l" +; CHECK-NEXT: ] +; CHECK-NEXT: N0 -> N1 [ label="" lhead=cluster_N2] +; CHECK-NEXT: subgraph cluster_N2 { ; CHECK-NEXT: fontname=Courier ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N1 [label = diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -11,6 +11,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -51,6 +54,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -96,6 +102,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -133,6 +142,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -207,6 +219,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -267,6 +282,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -307,6 +325,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -396,6 +417,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%0> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -10,6 +10,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -15,6 +15,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -82,6 +85,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -162,6 +168,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -244,6 +253,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -335,6 +347,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -437,6 +452,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -554,6 +572,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -665,6 +686,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -781,6 +805,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -849,6 +876,9 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -932,6 +962,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION @@ -1003,6 +1036,9 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION