Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -31,6 +31,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -70,6 +71,7 @@ #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include +#include #include using namespace llvm; @@ -103,7 +105,7 @@ const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - LoopSafetyInfo *SafetyInfo, + BasicBlock *Preheader, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, @@ -433,6 +435,198 @@ return Changed; } +// This is a helper class for hoistRegion to make it able to hoist control flow +// in order to be able to hoist phis. The way this works is that we initially +// start hoisting to the loop preheader, and when we see a loop invariant branch +// we make note of this. When we then come to hoist an instruction that's +// conditional on such a branch we duplicate the branch and the relevant control +// flow, then hoist the instruction into the block corresponding to its original +// block in the duplicated control flow. +class ControlFlowHoister { +private: + // Information about the loop we are hoisting from + LoopInfo *LI; + DominatorTree *DT; + Loop *CurLoop; + + // A map of blocks in the loop to the block their instructions will be hoisted + // to. + DenseMap HoistDestinationMap; + + // The branches that we can hoist, mapped to the block that marks a + // convergence point of their control flow. + DenseMap HoistableBranches; + + // We make a copy of the dominator tree children of the original preheader as + // we will need it whenever we insert a new preheader. + std::vector OriginalPreheaderChildren; + +public: + ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop) + : LI(LI), DT(DT), CurLoop(CurLoop) { + BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader(); + OriginalPreheaderChildren = DT->getNode(OriginalPreheader)->getChildren(); + } + + void registerPossiblyHoistableBranch(BranchInst *BI) { + // We can only hoist conditional branches with loop variant operands where + // both branch destinations are in the loop. + if (!BI->isConditional() || !CurLoop->hasLoopInvariantOperands(BI) || + !CurLoop->contains(BI->getSuccessor(0)) || + !CurLoop->contains(BI->getSuccessor(1))) + return; + + // We can hoist BI if one branch destination is the successor of the other, + // or both have common successor. Find the common successor by taking the + // intersection of the two successor sets, adding the block itself to its + // successors to handle the case where one is the successor of the other. + // TODO: This could be expanded to allowing branches where both ends + // eventually converge to a single block. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + SmallPtrSet TrueDestSucc = {TrueDest}; + SmallPtrSet FalseDestSucc = {FalseDest}; + for (BasicBlock *SuccBB : successors(TrueDest)) + TrueDestSucc.insert(SuccBB); + for (BasicBlock *SuccBB : successors(FalseDest)) + FalseDestSucc.insert(SuccBB); + set_intersect(TrueDestSucc, FalseDestSucc); + if (!TrueDestSucc.empty()) { + BasicBlock *CommonSucc = *TrueDestSucc.begin(); + // The branch can't be a loop backedge, which we check by checking if the + // branch dominates the common successor. + // TODO: It would be better to check CommonSucc != CurLoop->getHeader(), + // but doing the check this way also avoids certain kinds of control flow + // (e.g. three-way conditions), that getHoistedBlock doesn't currently + // handle correctly. + if (DT->dominates(BI, CommonSucc)) + HoistableBranches[BI] = CommonSucc; + } + } + + bool canHoistPHI(PHINode *PN) { + // We can hoist phis if the block they are in is the target of hoistable + // branches which cover all of the predecessors of the block. + SmallPtrSet PredecessorBlocks; + BasicBlock *BB = PN->getParent(); + for (BasicBlock *PredBB : predecessors(BB)) + PredecessorBlocks.insert(PredBB); + for (auto &Pair : HoistableBranches) { + if (Pair.second == BB) { + // Which blocks are predecessors via this branch depends on if the + // branch is triangle-like or diamond-like. + if (Pair.first->getSuccessor(0) == BB) { + PredecessorBlocks.erase(Pair.first->getParent()); + PredecessorBlocks.erase(Pair.first->getSuccessor(1)); + } else if (Pair.first->getSuccessor(1) == BB) { + PredecessorBlocks.erase(Pair.first->getParent()); + PredecessorBlocks.erase(Pair.first->getSuccessor(0)); + } else { + PredecessorBlocks.erase(Pair.first->getSuccessor(0)); + PredecessorBlocks.erase(Pair.first->getSuccessor(1)); + } + } + } + // PredecessorBlocks will now be empty if for every predecessor of BB we + // found a hoistable branch source. + return PredecessorBlocks.empty(); + } + + BasicBlock *getHoistedBlock(BasicBlock *BB) { + // If BB has already been hoisted, return that + if (HoistDestinationMap.count(BB)) + return HoistDestinationMap[BB]; + + // Check if this block is conditional based on a pending branch + BranchInst *BI = nullptr; + for (auto Pair : HoistableBranches) { + if ((Pair.first->getSuccessor(0) == BB && BB != Pair.second) || + (Pair.first->getSuccessor(1) == BB && BB != Pair.second)) { + assert(!BI && "BB is expected to be the target of at most one branch"); + BI = Pair.first; + } + } + + // If not involved in a pending branch, hoist to preheader + BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); + if (!BI) { + LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName() + << " as hoist destination for " << BB->getName() + << "\n"); + HoistDestinationMap[BB] = InitialPreheader; + return InitialPreheader; + } + + LLVMContext &C = BB->getContext(); + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + BasicBlock *CommonSucc = HoistableBranches[BI]; + BasicBlock *HoistTarget = getHoistedBlock(BI->getParent()); + + // Create hoisted versions of blocks that currently don't have them + auto CreateHoistedBlock = [&](BasicBlock *Orig) { + if (HoistDestinationMap[Orig]) + return HoistDestinationMap[Orig]; + BasicBlock *New = + BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); + HoistDestinationMap[Orig] = New; + DT->addNewBlock(New, HoistTarget); + if (CurLoop->getParentLoop()) + CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); + LLVM_DEBUG(dbgs() << "LICM created " << New->getName() + << " as hoist destination for " << Orig->getName() + << "\n"); + return New; + }; + BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); + BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); + BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); + + // Link up these blocks with branches + if (!HoistCommonSucc->getTerminator()) { + // The new common successor we've generated will branch to whatever that + // hoist target branched to. + BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); + assert(TargetSucc && "Expected hoist target to have a single successor"); + HoistCommonSucc->moveBefore(TargetSucc); + BranchInst::Create(TargetSucc, HoistCommonSucc); + } + if (!HoistTrueDest->getTerminator()) { + HoistTrueDest->moveBefore(HoistCommonSucc); + BranchInst::Create(HoistCommonSucc, HoistTrueDest); + } + if (!HoistFalseDest->getTerminator()) { + HoistFalseDest->moveBefore(HoistCommonSucc); + BranchInst::Create(HoistCommonSucc, HoistFalseDest); + } + + // If BI is being hoisted to what was originally the preheader then + // HoistCommonSucc will now be the new preheader. + if (HoistTarget == InitialPreheader) { + // Phis in the loop header now need to use the new preheader + InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); + // The new preheader dominates what the original preheader dominated. + DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); + for (DomTreeNode *Child : OriginalPreheaderChildren) + DT->changeImmediateDominator(Child, PreheaderNode); + // The preheader hoist destination is now the new preheader, with the + // exception of the hoist destination of this branch. + for (auto &Pair : HoistDestinationMap) + if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) + Pair.second = HoistCommonSucc; + } + + // Now finally hoist BI + ReplaceInstWithInst( + HoistTarget->getTerminator(), + BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); + + assert(CurLoop->getLoopPreheader() && + "Hoisting blocks should not have destroyed preheader"); + return HoistDestinationMap[BB]; + } +}; + /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before @@ -447,13 +641,39 @@ CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion"); + ControlFlowHoister CFH(LI, DT, CurLoop); + + // Keep track of instructions that have been hoisted, as they may need to be + // re-hoisted if they end up not dominating all of their uses. + std::list HoistedInstructions; + + // Record what the original preheader is, as we'll need it later if we need to + // re-hoist instructions. + BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader(); + // We want to visit parents before children. We will enque all the parents // before their children in the worklist and process the worklist in order. SmallVector Worklist = collectChildrenInLoop(N, CurLoop); bool Changed = false; - for (DomTreeNode *DTN : Worklist) { - BasicBlock *BB = DTN->getBlock(); + while (Worklist.size() > 0) { + // For PHI hoisting to work we need to hoist blocks before their successors. + // Do this by picking the first block that has no pending predecessors. + unsigned int idx = 0; + for (unsigned int i = 0; i < Worklist.size(); ++i) { + auto IsPredecessorOfWorklistI = [&](DomTreeNode *P) { + BasicBlock *BB1 = Worklist[i]->getBlock(); + BasicBlock *BB2 = P->getBlock(); + return BB1 != BB2 && llvm::is_contained(predecessors(BB1), BB2); + }; + if (!llvm::any_of(Worklist, IsPredecessorOfWorklistI)) { + idx = i; + break; + } + } + BasicBlock *BB = Worklist[idx]->getBlock(); + Worklist.erase(Worklist.begin() + idx); + // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB, CurLoop, LI)) @@ -491,14 +711,17 @@ // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and // if it is safe to hoist the instruction. - // + // TODO: It may be safe to hoist if we are hoisting to a conditional block + // and we have accurately duplicated the control flow from the loop header + // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && (IsMustExecute || isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator()))) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE); + HoistedInstructions.push_front(&I); Changed = true; continue; } @@ -521,7 +744,9 @@ I.replaceAllUsesWith(Product); I.eraseFromParent(); - hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); + hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getHoistedBlock(BB), + SafetyInfo, ORE); + HoistedInstructions.push_front(ReciprocalDivisor); Changed = true; continue; } @@ -532,11 +757,31 @@ isGuard(&I)) && IsMustExecute && IsMemoryNotModified && CurLoop->hasLoopInvariantOperands(&I)) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE); + HoistedInstructions.push_front(&I); Changed = true; continue; } + if (PHINode *PN = dyn_cast(&I)) { + if (CurLoop->hasLoopInvariantOperands(PN) && CFH.canHoistPHI(PN)) { + // Redirect incoming blocks first to ensure that we create hoisted + // versions of those blocks before we hoist the phi. + for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) + PN->setIncomingBlock(i, + CFH.getHoistedBlock(PN->getIncomingBlock(i))); + hoist(*PN, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE); + assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); + Changed = true; + continue; + } + } + + // Remember possibly hoistable branches so we can actually hoist them + // later if needed. + if (BranchInst *BI = dyn_cast(&I)) + CFH.registerPossiblyHoistableBranch(BI); + if (IsMustExecute) IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I); if (IsMemoryNotModified) @@ -544,6 +789,22 @@ } } + // If we hoisted instructions to a conditional block they may not dominate + // their uses that weren't hoisted. If so make them unconditional by moving + // them to the end of the original preheader, which is guaranteed to dominate + // everything in the loop. Instructions are pushed to the front of the list, + // so we iterate through them in first-in-last-out order which ensures that + // when we hoist an instruction we hoist its operands. + Instruction *HoistPoint = OriginalPreheader->getTerminator(); + for (Instruction *I : HoistedInstructions) { + if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) { + LLVM_DEBUG(dbgs() << "LICM rehoisting to " << OriginalPreheader->getName() + << ": " << *I << "\n"); + I->moveBefore(HoistPoint); + HoistPoint = I; + } + } + return Changed; } @@ -1099,8 +1360,8 @@ /// is safe to hoist, this instruction is called to do the dirty work. /// static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { - auto *Preheader = CurLoop->getLoopPreheader(); + BasicBlock *Preheader, LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE) { LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); ORE->emit([&]() { @@ -1119,8 +1380,13 @@ !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) I.dropUnknownNonDebugMetadata(); - // Move the new node to the Preheader, before its terminator. - I.moveBefore(Preheader->getTerminator()); + if (isa(I)) { + // Move the new node to the end of the phi list in the preheader. + I.moveBefore(Preheader->getFirstNonPHI()); + } else { + // Move the new node to the Preheader, before its terminator. + I.moveBefore(Preheader->getTerminator()); + } // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, Index: test/Transforms/LICM/hoist-phi.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/hoist-phi.ll @@ -0,0 +1,873 @@ +; RUN: opt -S -licm < %s | FileCheck %s +; RUN: opt -passes='require,loop(licm)' -S < %s | FileCheck %s + +; CHECK-LABEL: @triangle_phi +define void @triangle_phi(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: %add = add i32 %x, 1 +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: phi i32 [ %add, %[[IF_LICM]] ], [ %x, %entry ] +; CHECK: store i32 %phi, i32* %p +; CHECK: %cmp2 = icmp ne i32 %phi, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + %add = add i32 %x, 1 + br label %then + +then: + %phi = phi i32 [ %add, %if ], [ %x, %loop ] + store i32 %phi, i32* %p + %cmp2 = icmp ne i32 %phi, 0 + br i1 %cmp2, label %loop, label %end + +end: + ret void +} + +; CHECK-LABEL: @diamond_phi +define void @diamond_phi(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: %add = add i32 %x, 1 +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: %sub = sub i32 %x, 1 +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]] +; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ] +; CHECK: store i32 %phi, i32* %p +; CHECK: %cmp2 = icmp ne i32 %phi, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %add = add i32 %x, 1 + br label %then + +else: + %sub = sub i32 %x, 1 + br label %then + +then: + %phi = phi i32 [ %add, %if ], [ %sub, %else ] + store i32 %phi, i32* %p + %cmp2 = icmp ne i32 %phi, 0 + br i1 %cmp2, label %loop, label %end + +end: + ret void +} + +; This is currently too complicated for us to be able to hoist the phi. +; CHECK-LABEL: @three_way_phi +define void @three_way_phi(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp2 = icmp sgt i32 %add, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]] + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %sub = sub i32 %x, 1 +; CHECK: br label %loop + +entry: + br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 %add, 0 + br i1 %cmp2, label %if.if, label %then + +if.if: + %sub = sub i32 %x, 1 + br label %then + +then: + %phi = phi i32 [ 0, %loop ], [ %add, %if ], [ %sub, %if.if ] + store i32 %phi, i32* %p + %cmp3 = icmp ne i32 %phi, 0 + br i1 %cmp3, label %loop, label %end + +end: + ret void +} + +; This is currently too complicated for us to be able to hoist the phi. +; CHECK-LABEL: @tree_phi +define void @tree_phi(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp2 = icmp sgt i32 %add, 0 +; CHECK: %sub = sub i32 %x, 1 +; CHECK: br label %loop + +entry: + br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 %add, 0 + br i1 %cmp2, label %if.if, label %if.else + +if.if: + br label %then + +if.else: + br label %then + +else: + %sub = sub i32 %x, 1 + br label %then + +then: + %phi = phi i32 [ %add, %if.if ], [ 0, %if.else ], [ %sub, %else ] + store i32 %phi, i32* %p + %cmp3 = icmp ne i32 %phi, 0 + br i1 %cmp3, label %loop, label %end + +end: + ret void +} + +; We can hoist the first phi, but not the second. +; CHECK-LABEL: @phi_phi +define void @phi_phi(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp2 = icmp sgt i32 %add, 0 +; CHECK: %sub = sub i32 %x, 1 +; CHECK: br i1 %cmp2, label %[[IF_IF_LICM:.*]], label %[[IF_ELSE_LICM:.*]] + +; CHECK: [[IF_IF_LICM]]: +; CHECK: br label %[[IF_THEN_LICM:.*]] + +; CHECK: [[IF_ELSE_LICM]]: +; CHECK: br label %[[IF_THEN_LICM]] + +; CHECK: [[IF_THEN_LICM]]: +; CHECK: %phi1 = phi i32 [ %add, %[[IF_IF_LICM]] ], [ 0, %[[IF_ELSE_LICM]] ] +; CHECK: br label %loop + +entry: + br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 %add, 0 + br i1 %cmp2, label %if.if, label %if.else + +if.if: + br label %if.then + +if.else: + br label %if.then + +if.then: + %phi1 = phi i32 [ %add, %if.if ], [ 0, %if.else ] + br label %then + +else: + %sub = sub i32 %x, 1 + br label %then + +then: + %phi2 = phi i32 [ %phi1, %if.then ], [ %sub, %else ] + store i32 %phi2, i32* %p + %cmp3 = icmp ne i32 %phi2, 0 + br i1 %cmp3, label %loop, label %end + +end: + ret void +} + +; Check that we correctly duplicate empty control flow. +; CHECK-LABEL: @empty_triangle_phi +define i8 @empty_triangle_phi(i32 %x, i32 %y) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp eq i32 %x, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ] +; CHECK: %cmp2 = icmp eq i32 %y, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %loop ] + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %end, label %loop + +end: + ret i8 %phi +} + +; CHECK-LABEL: @empty_diamond_phi +define i8 @empty_diamond_phi(i32 %x, i32 %y) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp eq i32 %x, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ] +; CHECK: %cmp2 = icmp eq i32 %y, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp eq i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + br label %then + +else: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %else ] + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %end, label %loop + +end: + ret i8 %phi +} + +; Check that we correctly handle the case that the first thing we try to hoist is a phi. +; CHECK-LABEL: @empty_triangle_phi_first +define i8 @empty_triangle_phi_first(i32 %x, i1 %cond) { +; CHECK-LABEL: entry: +; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ] +; CHECK: %cmp = icmp eq i32 %x, 0 +; CHECK: br label %loop + +loop: + br i1 %cond, label %if, label %then + +if: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %loop ] + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %end, label %loop + +end: + ret i8 %phi +} + +; CHECK-LABEL: @empty_diamond_phi +define i8 @empty_diamond_phi_first(i32 %x, i1 %cond) { +; CHECK-LABEL: entry: +; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ] +; CHECK: %cmp = icmp eq i32 %x, 0 +; CHECK: br label %loop + +loop: + br i1 %cond, label %if, label %else + +if: + br label %then + +else: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %else ] + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %end, label %loop + +end: + ret i8 %phi +} + +; CHECK-LABEL: @empty_triangle_phi_first +define i8 @empty_triangle_phi_first_empty_loop_head(i32 %x, i1 %cond) { +; CHECK-LABEL: entry: +; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ] +; CHECK: %cmp = icmp eq i32 %x, 0 +; CHECK: br label %loop + +loop: + br label %test + +test: + br i1 %cond, label %if, label %then + +if: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %test ] + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %end, label %loop + +end: + ret i8 %phi +} + +; CHECK-LABEL: @empty_diamond_phi_first_empty_loop_head +define i8 @empty_diamond_phi_first_empty_loop_head(i32 %x, i1 %cond) { +; CHECK-LABEL: entry: +; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ] +; CHECK: %cmp = icmp eq i32 %x, 0 +; CHECK: br label %loop + +loop: + br label %test + +test: + br i1 %cond, label %if, label %else + +if: + br label %then + +else: + br label %then + +then: + %phi = phi i8 [ 0, %if ], [ 1, %else ] + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %end, label %loop + +end: + ret i8 %phi +} + +; The phi is on one branch of a diamond while simultaneously at the end of a +; triangle. Check that we duplicate the triangle and not the diamond. +; CHECK-LABEL: @triangle_diamond +define void @triangle_diamond(i32* %ptr, i32 %x, i32 %y) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp ne i32 %x, 0 +; CHECK: %cmp2 = icmp ne i32 %y, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i32 [ 0, %[[IF_LICM]] ], [ 127, %entry ] + +loop: + %cmp1 = icmp ne i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + %cmp2 = icmp ne i32 %y, 0 + br i1 %cmp2, label %if.then, label %then + +then: + %phi = phi i32 [ 0, %if ], [ 127, %loop ] + store i32 %phi, i32* %ptr + br label %end + +if.then: + br label %end + +end: + br label %loop +} + +; As the previous, but the end of the diamond is the head of the loop. +; CHECK-LABEL: @triangle_diamond_backedge +define void @triangle_diamond_backedge(i32* %ptr, i32 %x, i32 %y) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp ne i32 %x, 0 +; CHECK: %cmp2 = icmp ne i32 %y, 0 +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i32 [ 0, %[[IF_LICM]] ], [ 127, %entry ] + +loop: + %cmp1 = icmp ne i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + %cmp2 = icmp ne i32 %y, 0 + br i1 %cmp2, label %backedge, label %then + +then: + %phi = phi i32 [ 0, %if ], [ 127, %loop ] + store i32 %phi, i32* %ptr + br label %loop + +backedge: + br label %loop +} + +; The inner diamonds can be hoisted, but not currently the outer diamond +; CHECK-LABEL: @diamonds_inside_diamond +define void @diamonds_inside_diamond(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %cmp2 = icmp sgt i32 %x, 10 +; CHECK: %cmp3 = icmp slt i32 %x, -10 +; CHECK: br i1 %cmp2, label %[[IF_IF_LICM:.*]], label %[[IF_ELSE_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_IF_LICM]]: +; CHECK: br label %[[IF_THEN_LICM:.*]] + +; CHECK: [[IF_ELSE_LICM]]: +; CHECK: br label %[[IF_THEN_LICM]] + +; CHECK: [[IF_THEN_LICM]]: +; CHECK: %phi1 = phi i32 [ 0, %[[IF_IF_LICM]] ], [ 1, %[[IF_ELSE_LICM]] ] +; CHECK: br i1 %cmp3, label %[[ELSE_IF_LICM:.*]], label %[[ELSE_ELSE_LICM:.*]] + +; CHECK: [[ELSE_IF_LICM]]: +; CHECK: br label %[[ELSE_THEN_LICM:.*]] + +; CHECK: [[ELSE_ELSE_LICM]]: +; CHECK: br label %[[ELSE_THEN_LICM]] + +; CHECK: [[ELSE_THEN_LICM]]: +; CHECK: %phi2 = phi i32 [ 2, %[[ELSE_IF_LICM]] ], [ 3, %[[ELSE_ELSE_LICM]] ] +; CHECK: br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %cmp2 = icmp sgt i32 %x, 10 + br i1 %cmp2, label %if.if, label %if.else + +if.if: + br label %if.then + +if.else: + br label %if.then + +if.then: + %phi1 = phi i32 [ 0, %if.if ], [ 1, %if.else ] + br label %then + +else: + %cmp3 = icmp slt i32 %x, -10 + br i1 %cmp3, label %else.if, label %else.else + +else.if: + br label %else.then + +else.else: + br label %else.then + +else.then: + %phi2 = phi i32 [ 2, %else.if ], [ 3, %else.else ] + br label %then + +then: + %phi3 = phi i32 [ %phi1, %if.then ], [ %phi2, %else.then ] + store i32 %phi3, i32* %p + %cmp4 = icmp ne i32 %phi3, 0 + br i1 %cmp4, label %loop, label %end + +end: + ret void +} + +; We can hoist blocks that contain an edge that exits the loop by ignoring that +; edge in the hoisted block. +; CHECK-LABEL: @triangle_phi_loopexit +define void @triangle_phi_loopexit(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %cmp2 = icmp sgt i32 10, %add +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]]: +; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %x, %entry ] + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %then + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 10, %add + br i1 %cmp2, label %then, label %end + +then: + %phi = phi i32 [ %add, %if ], [ %x, %loop ] + store i32 %phi, i32* %p + %cmp3 = icmp ne i32 %phi, 0 + br i1 %cmp3, label %loop, label %end + +end: + ret void +} + +; CHECK-LABEL: @diamond_phi_oneloopexit +define void @diamond_phi_oneloopexit(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %cmp2 = icmp sgt i32 10, %add +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: %sub = sub i32 %x, 1 +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]] +; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ] +; CHECK: %cmp3 = icmp ne i32 %phi, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 10, %add + br i1 %cmp2, label %then, label %end + +else: + %sub = sub i32 %x, 1 + br label %then + +then: + %phi = phi i32 [ %add, %if ], [ %sub, %else ] + store i32 %phi, i32* %p + %cmp3 = icmp ne i32 %phi, 0 + br i1 %cmp3, label %loop, label %end + +end: + ret void +} + +; CHECK-LABEL: @diamond_phi_twoloopexit +define void @diamond_phi_twoloopexit(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %sub = sub i32 %x, 1 +; CHECK: %add = add i32 %x, 1 +; CHECK: %cmp1 = icmp sgt i32 %x, 0 +; CHECK: %cmp2 = icmp sgt i32 10, %add +; CHECK: %cmp3 = icmp sgt i32 10, %sub +; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]] +entry: + br label %loop + +; CHECK: [[IF_LICM]]: +; CHECK: br label %[[THEN_LICM:.*]] + +; CHECK: [[ELSE_LICM]]: +; CHECK: br label %[[THEN_LICM]] + +; CHECK: [[THEN_LICM]] +; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ] +; CHECK: %cmp4 = icmp ne i32 %phi, 0 +; CHECK: br label %loop + +loop: + %cmp1 = icmp sgt i32 %x, 0 + br i1 %cmp1, label %if, label %else + +if: + %add = add i32 %x, 1 + %cmp2 = icmp sgt i32 10, %add + br i1 %cmp2, label %then, label %end + +else: + %sub = sub i32 %x, 1 + %cmp3 = icmp sgt i32 10, %sub + br i1 %cmp3, label %then, label %end + +then: + %phi = phi i32 [ %add, %if ], [ %sub, %else ] + store i32 %phi, i32* %p + %cmp4 = icmp ne i32 %phi, 0 + br i1 %cmp4, label %loop, label %end + +end: + ret void +} + +; The store cannot be hoisted, so add and shr cannot be hoisted into a +; conditional block. +; CHECK-LABEL: @conditional_use +define void @conditional_use(i32 %x, i32* %p) { +; CHECK-LABEL: entry: +; CHECK: %cond = icmp ugt i32 %x, 0 +; CHECK: %add = add i32 %x, 5 +; CHECK: %shr = ashr i32 %add, 1 +; CHECK: br label %loop +entry: + br label %loop + +loop: + %cond = icmp ugt i32 %x, 0 + br i1 %cond, label %if, label %else + +; CHECK-LABEL: if: +; CHECK: store i32 %shr, i32* %p, align 4 +if: + %add = add i32 %x, 5 + %shr = ashr i32 %add, 1 + store i32 %shr, i32* %p, align 4 + br label %then + +else: + br label %then + +then: + br label %loop +} + +; A diamond with two triangles on the left and one on the right. This test is +; to check that we have a unique loop preheader when we hoist the store (and so +; don't fail an assertion). +; CHECK-LABEL: @triangles_in_diamond +define void @triangles_in_diamond(i32* %ptr) { +; CHECK-LABEL: entry: +; CHECK: store i32 0, i32* %ptr, align 4 +; CHECK: br label %loop +entry: + br label %loop + +loop: + br i1 undef, label %left_triangle_1, label %right_triangle + +left_triangle_1: + br i1 undef, label %left_triangle_1_if, label %left_triangle_2 + +left_triangle_1_if: + br label %left_triangle_2 + +left_triangle_2: + br i1 undef, label %left_triangle_2_if, label %left_triangle_2_then + +left_triangle_2_if: + br label %left_triangle_2_then + +left_triangle_2_then: + br label %loop.end + +right_triangle: + br i1 undef, label %right_triangle.if, label %right_triangle.then + +right_triangle.if: + br label %right_triangle.then + +right_triangle.then: + br label %loop.end + +loop.end: + store i32 0, i32* %ptr, align 4 + br label %loop +} + +; %cmp dominates its used after being hoisted, but not after %brmerge is rehoisted +; CHECK-LABEL: @rehoist +define void @rehoist(i8* %this, i32 %x) { +; CHECK-LABEL: entry: +; CHECK: %sub = add nsw i32 %x, -1 +; CHECK: %fptr = bitcast i8* %this to void (i8*)* +; CHECK: %cmp = icmp eq i32 0, %sub +; CHECK: %brmerge = or i1 %cmp, true +entry: + %sub = add nsw i32 %x, -1 + br label %loop + +loop: + br i1 undef, label %if1, label %else1 + +if1: + %fptr = bitcast i8* %this to void (i8*)* + call void %fptr(i8* %this) + br label %then1 + +else1: + br label %then1 + +then1: + %cmp = icmp eq i32 0, %sub + br i1 %cmp, label %end, label %else2 + +else2: + %brmerge = or i1 %cmp, true + br i1 %brmerge, label %if3, label %end + +if3: + br label %end + +end: + br label %loop +} + +; A test case that uses empty blocks in a way that can cause control flow +; hoisting to get confused. +; CHECK-LABEL: @empty_blocks_multiple_conditional_branches +define void @empty_blocks_multiple_conditional_branches(float %arg, float* %ptr) { +; CHECK-LABEL: entry +; CHECK: %div1 = fmul float %arg, 4.000000e+00 +; CHECK: %div2 = fmul float %arg, 2.000000e+00 +entry: + br label %loop + +; The exact path to the phi isn't checked here, because it depends on whether +; cond2 or cond3 is hoisted first +; CHECK: %phi = phi float [ 0.000000e+00, %{{.*}} ], [ %div1, %{{.*}} ] +; CHECK: br label %loop + +loop: + br i1 undef, label %backedge2, label %cond1 + +cond1: + br i1 undef, label %cond1.if, label %cond1.else + +cond1.else: + br label %cond3 + +cond1.if: + br label %cond1.if.next + +cond1.if.next: + br label %cond2 + +cond2: + %div1 = fmul float %arg, 4.000000e+00 + br i1 undef, label %cond2.if, label %cond2.then + +cond2.if: + br label %cond2.then + +cond2.then: + %phi = phi float [ 0.000000e+00, %cond2 ], [ %div1, %cond2.if ] + store float %phi, float* %ptr + br label %backedge2 + +cond3: + br i1 undef, label %cond3.then, label %cond3.if + +cond3.if: + %div2 = fmul float %arg, 2.000000e+00 + store float %div2, float* %ptr + br label %cond3.then + +cond3.then: + br label %loop + +backedge2: + br label %loop +} Index: test/Transforms/LoopVectorize/invariant-store-vectorization.ll =================================================================== --- test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -199,10 +199,14 @@ ; invariant val stored to invariant address predicated on invariant condition ; This is not treated as a predicated store since the block the store belongs to ; is the latch block (which doesn't need to be predicated). -; TODO: We should vectorize this loop once we relax the check for -; variant/invariant values being stored to invariant address. +; This can be vectorized as LICM will hoist storeval. ; CHECK-LABEL: inv_val_store_to_inv_address_conditional_inv -; CHECK-NOT: <4 x +; CHECK-LABEL: vector.body: +; CHECK: store <4 x i32> +; CHECK: store i32 %storeval, i32* %a +; CHECK-NEXT: %index.next = add i64 %index, 4 +; CHECK-NEXT: icmp eq i64 %index.next, %n.vec +; CHECK-NEXT: br i1 define void @inv_val_store_to_inv_address_conditional_inv(i32* %a, i64 %n, i32* %b, i32 %k) { entry: %ntrunc = trunc i64 %n to i32