Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -550,6 +550,9 @@ SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif + // Clones of this instruction. Don't create more than one per exit block! + SmallDenseMap SunkCopies; + // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. @@ -561,30 +564,37 @@ assert(ExitBlockSet.count(ExitBlock) && "The LCSSA PHI is not in an exit block!"); - Instruction *New = I.clone(); - ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); - if (!I.getName().empty()) - New->setName(I.getName() + ".le"); - - // Build LCSSA PHI nodes for any in-loop operands. Note that this is - // particularly cheap because we can rip off the PHI node that we're - // replacing for the number and blocks of the predecessors. - // OPT: If this shows up in a profile, we can instead finish sinking all - // invariant instructions, and then walk their operands to re-establish - // LCSSA. That will eliminate creating PHI nodes just to nuke them when - // sinking bottom-up. - for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; - ++OI) - if (Instruction *OInst = dyn_cast(*OI)) - if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) - if (!OLoop->contains(PN)) { - PHINode *OpPN = PHINode::Create( - OInst->getType(), PN->getNumIncomingValues(), - OInst->getName() + ".lcssa", ExitBlock->begin()); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); - *OI = OpPN; - } + Instruction *New; + auto It = SunkCopies.find(ExitBlock); + if (It != SunkCopies.end()) { + New = It->second; + } else { + New = I.clone(); + SunkCopies[ExitBlock] = New; + ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); + if (!I.getName().empty()) + New->setName(I.getName() + ".le"); + + // Build LCSSA PHI nodes for any in-loop operands. Note that this is + // particularly cheap because we can rip off the PHI node that we're + // replacing for the number and blocks of the predecessors. + // OPT: If this shows up in a profile, we can instead finish sinking all + // invariant instructions, and then walk their operands to re-establish + // LCSSA. That will eliminate creating PHI nodes just to nuke them when + // sinking bottom-up. + for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; + ++OI) + if (Instruction *OInst = dyn_cast(*OI)) + if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) + if (!OLoop->contains(PN)) { + PHINode *OpPN = PHINode::Create( + OInst->getType(), PN->getNumIncomingValues(), + OInst->getName() + ".lcssa", ExitBlock->begin()); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); + *OI = OpPN; + } + } PN->replaceAllUsesWith(New); PN->eraseFromParent(); Index: test/Transforms/LICM/extra-copies.ll =================================================================== --- test/Transforms/LICM/extra-copies.ll +++ test/Transforms/LICM/extra-copies.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -licm -S | FileCheck %s +; PR19835 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f(i32 %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %storemerge4 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %mul = mul nsw i32 %x, %x + %add2 = add nsw i32 %mul, %x + %mul3 = add nsw i32 %add2, %mul + %inc = add nsw i32 %storemerge4, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %a9.0.lcssa = phi i32 [ %mul3, %for.body ] + ret i32 %a9.0.lcssa +} + +; Test that there is exactly one copy of mul nsw i32 %x, %x in the exit block. +; CHECK: define i32 @f(i32 [[X:%.*]]) +; CHECK: for.end: +; CHECK-NOT: mul nsw i32 [[X]], [[X]] +; CHECK: mul nsw i32 [[X]], [[X]] +; CHECK-NOT: mul nsw i32 [[X]], [[X]]