Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -98,6 +98,10 @@ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); +static cl::opt DedupBB( + "simplifycfg-dedup-bb", cl::Hidden, cl::init(true), + cl::desc("Merge duplicated basic blocks into one")); + static cl::opt HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); @@ -132,6 +136,7 @@ NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); +STATISTIC(NumDedupBBs, "Number of duplicated basic blocks merged"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -1676,6 +1681,146 @@ } // end anonymous namespace +static unsigned getNumNonDbgInstrInBB(BasicBlock *BB) { + unsigned count = 0; + for (Instruction &Instr : *BB) + if (!isa(Instr)) count++; + return count; +} + +/// Given an unconditional branch that goes to BBSucc, +/// check whether BBSucc has two exactly same predecessors. +/// If it is true, merge these BBs into one BB. +static bool MergeDuplicatedBlock(BranchInst *BI, const SmallPtrSetImpl *LoopHeaders) { + assert(BI->isUnconditional()); + + // For this input CFG, we merge BB1 and BB2 if possible. + // This optimization is applied only in simple (but common) situations. + // e.g. we don't merge BBs if we need to add a new PHI node in BBsucc. + // Also, we don't optimize BBs around loops to avoid costly dominator analysis. + // + // input CFG: + // [BB0] [other BBs] + // \ | + // [BB1][BB2] [other BBs] + // \ | / + // [ BBsucc ] + // / | \ + // + // optimized CFG: + // [BB0] [other BBs] + // \ | + // ---[BB2] [other BBs] + // | / + // [ BBsucc ] + // / | \ + // + // For the edge(s) from (potentially multiple) BB0 to BB1, + // we support only conditional or unconditional branch instrcutions. + // For the edge from BB1 to BBsucc and BB2 to BBsucc, + // we support only unconditional branch instrcutions. + + BasicBlock *BB1 = BI->getParent(); + BasicBlock *BBSucc = BI->getSuccessor(0); + unsigned BB1NumInst = getNumNonDbgInstrInBB(BB1); + + // We do not optimize the entry block + if (BB1 == &BB1->getParent()->getEntryBlock()) return false; + + // An empty block will be eliminated anyway + if (BB1NumInst <= 1) return false; + + // We avoid optimizing a method with a loop to avoid excissive analysis cost + // for other than blocks just before the return block (a common case). + if (!(LoopHeaders && LoopHeaders->empty()) && + !isa(BBSucc->getTerminator())) return false; + + BasicBlock::iterator II = BBSucc->begin(); + const PHINode *PN = dyn_cast(II); + Value *InValBB1; + Instruction *InInstBB1; + if (PN) { + // We do not optimize if multiple PHI instructions exist + // in the successor for ease of analysis + if (++II != BBSucc->end() && isa(II)) return false; + + InValBB1 = PN->getIncomingValueForBlock(BB1); + InInstBB1 = dyn_cast(InValBB1); + } + + // We do not optimize non-branch CFG edge (e.g. switch) + for (auto *B : predecessors(BB1)) + if (!isa(B->getTerminator())) return false; + + for (auto *BB2 : predecessors(BBSucc)) { + if (BB2 == BB1) continue; + + // We do not optimize the entry block + if (BB2 == &BB2->getParent()->getEntryBlock()) continue; + + // We only merge CFG edges of unconditional branch + BranchInst *BI = dyn_cast(BB2->getTerminator()); + if (!(BI && BI->isUnconditional())) continue; + + // We do not optimize non-branch CFG edges + for (auto *B : predecessors(BB2)) + if (!isa(B->getTerminator())) continue; + + // We can merge control flow if incomming values to the PHI node + // at the succesor are same values or both defined in the BBs to merge. + // For the latter case, canSinkInstructions executes further analysis + if (PN) { + Value *InValBB2 = PN->getIncomingValueForBlock(BB2); + Instruction *InInstBB2 = dyn_cast(InValBB2); + if (!(InValBB1 == InValBB2) && + !(InInstBB1 && InInstBB1->getParent() == BB1 && + InInstBB2 && InInstBB2->getParent() == BB2)) continue; + } + + // If the sizes of BBs are different, we do not need to check in detail + if (BB1NumInst != getNumNonDbgInstrInBB(BB2)) continue; + + // We check the instructions in two BBs are same + SmallVector UnconditionalPreds({BB1, BB2}); + DenseMap> PHIOperands; + LockstepReverseIterator LRI(UnconditionalPreds); + while (LRI.isValid() && + canSinkInstructions(*LRI, PHIOperands)) { + // we do not optimize if additional PHI node is required + if (PHIOperands.size() > 0) break; + --LRI; + } + // if iterator is still valid, it means a mismatch found in middle of BB + if (LRI.isValid()) continue; + + BasicBlock *BBToErase = BB1; + BasicBlock *BBToRetain = BB2; + DEBUG(dbgs() << "DEDUP BB: merging duplicated blocks (" << BBToErase->getName() + << " into " << BBToRetain->getName() << ")\n"); + + // We replace the destination of incomming edges of BBToErase by BBToRetain + SmallVector BBToUpdate(predecessors(BBToErase)); + for (BasicBlock *BB0 : BBToUpdate) { + TerminatorInst *T = BB0->getTerminator(); + bool updated = false; + for (unsigned OI = 0, OE = T->getNumOperands(); OI != OE; ++OI) { + if (T->getOperand(OI) == BBToErase) { + T->setOperand(OI, BBToRetain); + updated = true; + } + } + assert(updated && "CFG and branch instruction are inconsistent"); + } + + // Now we can remove the BB, which has no incomming edge + DeleteDeadBlock(BBToErase); + NumDedupBBs++; + return true; + } + + return false; +} + /// Given an unconditional branch that goes to BBEnd, /// check whether BBEnd has only two predecessors and the other predecessor /// ends with an unconditional branch. If it is true, sink any common code @@ -5691,6 +5836,9 @@ IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + if (DedupBB && MergeDuplicatedBlock(BI, LoopHeaders)) + return true; + if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; Index: test/Transforms/SimplifyCFG/dedup-bb.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/dedup-bb.ll @@ -0,0 +1,115 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +define i64 @test(i64 %a) { +; in this test, if.then1, if.then2 and if.then3 must be merged into one block +; CHECK-LABEL: test +; CHECK: call i64 @foo +; CHECK-NOT: call i64 @foo +entry: + %cmp1 = icmp eq i64 %a, 1 + br i1 %cmp1, label %if.then1, label %if.else1 + +if.else1: + store i64 1, i64* @g + %cmp2 = icmp eq i64 %a, 2 + br i1 %cmp2, label %if.then2, label %if.else2 + +if.else2: + store i64 2, i64* @g + %cmp3 = icmp eq i64 %a, 3 + br i1 %cmp3, label %if.then3, label %return + +return: + %retval = phi i64 [ %call1, %if.then1 ], [ %call2, %if.then2 ], [ %call3, %if.then3 ], [ 0, %if.else2 ] + ret i64 %retval + +if.then1: + %call1 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then2: + %call2 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then3: + %call3 = tail call i64 @foo(i64 %a) #2 + br label %return +} + +define i64 @test2(i64 %a) { +; in this test, if.then3 should not be merged due to difference in instruction +; CHECK-LABEL: test2 +; CHECK-DAG: call i64 @foo +; CHECK-DAG: call i64 @bar +; CHECK-NOT: call i64 @foo +entry: + %cmp1 = icmp eq i64 %a, 1 + br i1 %cmp1, label %if.then1, label %if.else1 + +if.else1: + store i64 1, i64* @g + %cmp2 = icmp eq i64 %a, 2 + br i1 %cmp2, label %if.then2, label %if.else2 + +if.else2: + store i64 2, i64* @g + %cmp3 = icmp eq i64 %a, 3 + br i1 %cmp3, label %if.then3, label %return + +return: + %retval = phi i64 [ %call1, %if.then1 ], [ %call2, %if.then2 ], [ %call3, %if.then3 ], [ 0, %if.else2 ] + ret i64 %retval + +if.then1: + %call1 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then2: + %call2 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then3: + %call3 = tail call i64 @bar(i64 %a) #2 + br label %return +} + +define i64 @test3(i64 %a) { +; in this test, if.then2 should not be merged due to difference in PHI node at the return block +; CHECK-LABEL: test3 +; CHECK: call i64 @foo +; CHECK: call i64 @foo +; CHECK-NOT: call i64 @foo +entry: + %cmp1 = icmp eq i64 %a, 1 + br i1 %cmp1, label %if.then1, label %if.else1 + +if.else1: + store i64 1, i64* @g + %cmp2 = icmp eq i64 %a, 2 + br i1 %cmp2, label %if.then2, label %if.else2 + +if.else2: + store i64 2, i64* @g + %cmp3 = icmp eq i64 %a, 3 + br i1 %cmp3, label %if.then3, label %return + +return: + %retval = phi i64 [ %call1, %if.then1 ], [ 0, %if.then2 ], [ %call3, %if.then3 ], [ 0, %if.else2 ] + ret i64 %retval + +if.then1: + %call1 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then2: + %call2 = tail call i64 @foo(i64 %a) #2 + br label %return + +if.then3: + %call3 = tail call i64 @foo(i64 %a) #2 + br label %return +} + +declare i64 @foo(i64) +declare i64 @bar(i64) +@g = global i64 0