Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -62,6 +62,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -93,7 +94,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); -static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, +static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); @@ -715,26 +716,6 @@ if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; - - // A PHI node where all of the incoming values are this instruction are - // special -- they can just be RAUW'ed with the instruction and thus - // don't require a use in the predecessor. This is a particular important - // special case because it is the pattern found in LCSSA form. - if (isTriviallyReplacablePHI(*PN, I)) { - if (CurLoop->contains(PN)) - return false; - else - continue; - } - - // Otherwise, PHI node uses occur in predecessor blocks if the incoming - // values. Check for such a use being inside the loop. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == &I) - if (CurLoop->contains(PN->getIncomingBlock(i))) - return false; - - continue; } if (CurLoop->contains(UI)) @@ -804,12 +785,93 @@ return New; } +static Instruction *sinkThroughTriviallyReplacablePHI( + PHINode *TPN, Instruction *I, LoopInfo *LI, + SmallDenseMap &SunkCopies, + const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { +#ifndef NDEBUG + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); +#endif + BasicBlock *ExitBlock = TPN->getParent(); + assert(ExitBlockSet.count(ExitBlock) && + "The LCSSA PHI is not in an exit block!"); + + Instruction *New; + auto It = SunkCopies.find(ExitBlock); + if (It != SunkCopies.end()) + New = It->second; + else + New = SunkCopies[ExitBlock] = + CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); + return New; +} + +static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, + LoopInfo *LI, const Loop *CurLoop) { +#ifndef NDEBUG + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); +#endif + + assert(ExitBlockSet.count(PN->getParent()) && + "Expect the PHI is in an exit block."); + + // Split predecessors of the loop exit to make instructions in the loop are + // exposed to exit blocks through trivially replacable PHIs while keeping the + // loop in the canonical form where each predecessor of each exit block should + // be contained within the loop. For example, this will convert the loop below + // from + // + // LB1: + // %v1 = + // br %LE, %LB2 + // LB2: + // %v2 = + // br %LE, %LB1 + // LE: + // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replacable + // + // to + // + // LB1: + // %v1 = + // br %LE.split, %LB2 + // LB2: + // %v2 = + // br %LE.split2, %LB1 + // LE.split: + // %p1 = phi [%v1, %LB1] <-- trivially replacable + // br %LE + // LE.split2: + // %p2 = phi [%v2, %LB2] <-- trivially replacable + // br %LE + // LE: + // %p = phi [%p1, %LE.split], [%p2, %LE.split2] + // + unsigned i = 0, e = PN->getNumIncomingValues(); + while (i != e) { + if (CurLoop->contains(PN->getIncomingBlock(i))) { + SplitBlockPredecessors(PN->getParent(), PN->getIncomingBlock(i), + ".split.loop.exit", DT, LI, true); + i = 0; + e = PN->getNumIncomingValues(); + continue; + } + ++i; + } +} + /// When an instruction is found to only be used outside of the loop, this /// function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// -static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, +static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { @@ -857,20 +919,17 @@ continue; } - BasicBlock *ExitBlock = PN->getParent(); - assert(ExitBlockSet.count(ExitBlock) && - "The LCSSA PHI is not in an exit block!"); - - Instruction *New; - auto It = SunkCopies.find(ExitBlock); - if (It != SunkCopies.end()) - New = It->second; - else - New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); - - PN->replaceAllUsesWith(New); - PN->eraseFromParent(); + if (isTriviallyReplacablePHI(*PN, I)) { + Instruction *New = sinkThroughTriviallyReplacablePHI( + PN, &I, LI, SunkCopies, SafetyInfo, CurLoop); + PN->replaceAllUsesWith(New); + PN->eraseFromParent(); + } else { + // Split predecessors of the PHI so that we can sink the instruction + // through a trivially replacable PHIs. + splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop); + continue; + } } CurAST->deleteValue(&I); Index: test/Transforms/LICM/sinking.ll =================================================================== --- test/Transforms/LICM/sinking.ll +++ test/Transforms/LICM/sinking.ll @@ -392,6 +392,48 @@ indirectbr i8* undef, [label %lab21, label %lab19] } +; Check if LICM can sink a sinkable instruction to the exit blocks through +; a non-trivially replacable PHI node. +define i32 @test14(i32 %N, i32 %N2, i1 %C) { +; CHECK-LABEL: @test14 +Entry: + br label %Loop + +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; CHECK-NOT: sub +Loop: + %N_addr.0.pn = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ] + %tmp.6 = mul i32 %N, %N_addr.0.pn + %tmp.7 = sub i32 %tmp.6, %N + %tmp.8 = sub i32 %tmp.6, %N2 + %dec = add i32 %N_addr.0.pn, -1 + br i1 %C, label %ContLoop, label %Out12 + +ContLoop: + %tmp.1 = icmp ne i32 %N_addr.0.pn, 1 + br i1 %tmp.1, label %Loop, label %Out12 + +; CHECK-LABEL: Out12.split.loop.exit: +; CHECK: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn, %ContLoop ] +; CHECK: %[[MUL:.*]] = mul i32 %N, %[[LCSSAPHI]] +; CHECK: %[[SUB:.*]] = sub i32 %[[MUL]], %N2 +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12.split.loop.exit1: +; CHECK: %[[LCSSAPHI2:.*]] = phi i32 [ %N_addr.0.pn, %Loop ] +; CHECK: %[[MUL2:.*]] = mul i32 %N, %[[LCSSAPHI2]] +; CHECK: %[[SUB2:.*]] = sub i32 %[[MUL2]], %N +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12: +; CHECK: phi i32 [ %[[SUB]], %Out12.split.loop.exit ], [ %[[SUB2]], %Out12.split.loop.exit1 ] + +Out12: + %nonTrivialPhi = phi i32 [%tmp.8, %ContLoop], [%tmp.7, %Loop] + ret i32 %nonTrivialPhi +} + declare void @f(i32*) declare void @g()