diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3101,32 +3101,12 @@ BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc()); ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); - // We intentionally don't let SplitBlock to update LoopInfo since - // LoopVectorBody should belong to another loop than LoopVectorPreHeader. - // LoopVectorBody is explicitly added to the correct place few lines later. - BasicBlock *LoopVectorBody = - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - nullptr, nullptr, Twine(Prefix) + "vector.body"); - // Update dominator for loop exit. if (!Cost->requiresScalarEpilogue(VF)) // If there is an epilogue which must run, there's no edge from the // middle block to exit blocks and thus no need to update the immediate // dominator of the exit blocks. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); - - // Create and register the new vector loop. - Loop *Lp = LI->AllocateLoop(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - - // Insert the new loop into the loop nest and register the new basic blocks - // before calling any utilities such as SCEV that require valid LoopInfo. - if (ParentLoop) { - ParentLoop->addChildLoop(Lp); - } else { - LI->addTopLevelLoop(Lp); - } - Lp->addBasicBlockToLoop(LoopVectorBody, *LI); } void InnerLoopVectorizer::createInductionResumeValues( @@ -8733,7 +8713,9 @@ VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - auto Plan = std::make_unique(TopRegion); + VPBasicBlock *Preheader = new VPBasicBlock("vector.preheader"); + VPBlockUtils::insertBlockAfter(TopRegion, Preheader); + auto Plan = std::make_unique(Preheader); Instruction *DLInst = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); @@ -9503,10 +9485,14 @@ } LastInduction->setName("vec.ind.next"); - VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader); + BasicBlock *VectorPreHeader = + State.CFG.VPBB2IRBB[getParent() + ->getSingleHierarchicalPredecessor() + ->getExitBasicBlock()]; + VecInd->addIncoming(SteppedStart, VectorPreHeader); // Add induction update using an incorrect block temporarily. The phi node // will be fixed after VPlan execution. - VecInd->addIncoming(LastInduction, State.CFG.VectorPreHeader); + VecInd->addIncoming(LastInduction, VectorPreHeader); } void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { @@ -9561,7 +9547,12 @@ Type *ScStValueType = ScalarStartValue->getType(); PHINode *NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV); - NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader); + + BasicBlock *VectorPreHeader = + State.CFG.VPBB2IRBB[getParent() + ->getSingleHierarchicalPredecessor() + ->getExitBasicBlock()]; + NewPointerPhi->addIncoming(ScalarStartValue, VectorPreHeader); // A pointer induction, performed by using a gep const DataLayout &DL = Phi->getModule()->getDataLayout(); @@ -9576,7 +9567,7 @@ IndDesc.getElementType(), NewPointerPhi, State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind", &*State.Builder.GetInsertPoint()); - NewPointerPhi->addIncoming(InductionGEP, State.CFG.VectorPreHeader); + NewPointerPhi->addIncoming(InductionGEP, VectorPreHeader); // Create UF many actual address geps that use the pointer // phi as base and a vectorized version of the step value diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2645,10 +2645,10 @@ /// Returns the VPRegionBlock of the vector loop. VPRegionBlock *getVectorLoopRegion() { - return cast(getEntry()); + return cast(getEntry()->getSingleSuccessor()); } const VPRegionBlock *getVectorLoopRegion() const { - return cast(getEntry()); + return cast(getEntry()->getSingleSuccessor()); } /// Returns the canonical induction recipe of the vector loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -313,6 +313,7 @@ if (PrevVPBB && /* A */ !((SingleHPred = getSingleHierarchicalPredecessor()) && SingleHPred->getExitBasicBlock() == PrevVPBB && + this != getPlan()->getVectorLoopRegion()->getEntryBasicBlock() && PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ !(Replica && getPredecessors().empty())) { /* C */ NewBB = createEmptyBasicBlock(State->CFG); @@ -449,6 +450,18 @@ ReversePostOrderTraversal RPOT(Entry); if (!isReplicator()) { + // Create and register the new vector loop. + State->CurrentVectorLoop = State->LI->AllocateLoop(); + Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader); + + // Insert the new loop into the loop nest and register the new basic blocks + // before calling any utilities such as SCEV that require valid LoopInfo. + if (ParentLoop) { + ParentLoop->addChildLoop(State->CurrentVectorLoop); + } else { + State->LI->addTopLevelLoop(State->CurrentVectorLoop); + } + // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { if (EnableVPlanNativePath) { @@ -906,24 +919,20 @@ BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; State->CFG.VectorPreHeader = VectorPreHeaderBB; - BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); - assert(VectorHeaderBB && "Loop preheader does not have a single successor."); - - Loop *L = State->LI->getLoopFor(VectorHeaderBB); - State->CurrentVectorLoop = L; // Remove the edge between Header and Latch to allow other connections. // Temporarily terminate with unreachable until CFG is rewired. // Note: this asserts the generated code's assumption that // getFirstInsertionPt() can be dereferenced into an Instruction. - VectorHeaderBB->getTerminator()->eraseFromParent(); - State->Builder.SetInsertPoint(VectorHeaderBB); + + VectorPreHeaderBB->getTerminator()->eraseFromParent(); + State->Builder.SetInsertPoint(VectorPreHeaderBB); UnreachableInst *Terminator = State->Builder.CreateUnreachable(); State->Builder.SetInsertPoint(Terminator); // 1. Generate code in loop body. State->CFG.PrevVPBB = nullptr; - State->CFG.PrevBB = VectorHeaderBB; + State->CFG.PrevBB = VectorPreHeaderBB; for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); @@ -1002,10 +1011,14 @@ } } + BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; + VectorHeaderBB->setName("vector.body"); // We do not attempt to preserve DT for outer loop vectorization currently. - if (!EnableVPlanNativePath) + if (!EnableVPlanNativePath) { + State->DT->addNewBlock(VectorHeaderBB, VectorPreHeaderBB); updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, State->CFG.LastBB); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1427,7 +1440,10 @@ Value *Start = getStartValue()->getLiveInIRValue(); PHINode *EntryPart = PHINode::Create( Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(Start, State.CFG.VectorPreHeader); + EntryPart->addIncoming( + Start, State.CFG.VPBB2IRBB[getParent() + ->getSingleHierarchicalPredecessor() + ->getExitBasicBlock()]); EntryPart->setDebugLoc(DL); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) State.set(this, EntryPart, Part); @@ -1512,10 +1528,14 @@ PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); } + BasicBlock *VectorPreHeader = + State.CFG.VPBB2IRBB[getParent() + ->getSingleHierarchicalPredecessor() + ->getExitBasicBlock()]; // Create a phi node for the new recurrence. PHINode *EntryPart = PHINode::Create( VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); - EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); + EntryPart->addIncoming(VectorInit, VectorPreHeader); State.set(this, EntryPart, 0); } @@ -1551,6 +1571,11 @@ State.set(this, EntryPart, Part); } + BasicBlock *VectorPreHeader = + State.CFG.VPBB2IRBB[getParent() + ->getSingleHierarchicalPredecessor() + ->getExitBasicBlock()]; + // Reductions do not have to start at zero. They can start with // any loop invariant values. VPValue *StartVPV = getStartValue(); @@ -1565,7 +1590,7 @@ Iden = StartV; } else { IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPreHeader->getTerminator()); StartV = Iden = Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); } @@ -1576,7 +1601,7 @@ if (!ScalarPHI) { Iden = Builder.CreateVectorSplat(State.VF, Iden); IRBuilderBase::InsertPointGuard IPBuilder(Builder); - Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); + Builder.SetInsertPoint(VectorPreHeader->getTerminator()); Constant *Zero = Builder.getInt32(0); StartV = Builder.CreateInsertElement(Iden, StartV, Zero); } @@ -1587,7 +1612,7 @@ // Make sure to add the reduction start value only to the // first unroll part. Value *StartVal = (Part == 0) ? StartV : Iden; - cast(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader); + cast(EntryPart)->addIncoming(StartVal, VectorPreHeader); } }