Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -212,6 +212,7 @@ const TargetTransformInfo *TTI = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; @@ -262,6 +263,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -343,6 +345,8 @@ TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + DT = &getAnalysis().getDomTree(); + OptSize = F.optForSize(); if (ProfileGuidedSectionPrefix) { @@ -755,6 +759,11 @@ return true; SmallPtrSet SameIncomingValueBBs; + SmallVector PNs; + + for (auto DestBBI = DestBB->begin(); + auto *DestPN = dyn_cast(&*DestBBI); ++DestBBI) + PNs.push_back(DestPN); // Find all other incoming blocks from which incoming values of all PHIs in // DestBB are the same as the ones from BB. @@ -764,16 +773,10 @@ if (DestBBPred == BB) continue; - bool HasAllSameValue = true; - BasicBlock::const_iterator DestBBI = DestBB->begin(); - while (const PHINode *DestPN = dyn_cast(DestBBI++)) { - if (DestPN->getIncomingValueForBlock(BB) != - DestPN->getIncomingValueForBlock(DestBBPred)) { - HasAllSameValue = false; - break; - } - } - if (HasAllSameValue) + if (llvm::all_of(PNs, [&](PHINode *PN) { + return (PN->getIncomingValueForBlock(BB) == + PN->getIncomingValueForBlock(DestBBPred)); + })) SameIncomingValueBBs.insert(DestBBPred); } @@ -783,6 +786,14 @@ if (SameIncomingValueBBs.count(Pred)) return true; + // Check to see if none of the phis have constant incoming values for BB and + // Pred dominates DestBB, in such case extra COPYs are likely not added, so + // there is no reason to skip merging. + if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) { + return isa(PN->getIncomingValueForBlock(BB)); + })) + return true; + if (!BFI) { Function &F = *BB->getParent(); LoopInfo LI{DominatorTree(F)}; @@ -885,7 +896,7 @@ // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, nullptr); + MergeBasicBlockIntoOnlyPred(DestBB, DT); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -926,7 +937,21 @@ // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. + SmallVector Updates; + for (auto *PredBB : predecessors(BB)) { + if (PredBB == BB) + continue; + DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB}; + if (find(Updates, UT) == Updates.end()) { + Updates.push_back(UT); + if (PredBB != DestBB) + Updates.push_back({DominatorTree::Insert, PredBB, DestBB}); + } + } BB->replaceAllUsesWith(DestBB); + DT->applyUpdates(Updates); + BB->getTerminator()->eraseFromParent(); + DT->deleteEdge(BB, DestBB); BB->eraseFromParent(); ++NumBlocksElim; Index: test/Transforms/CodeGenPrepare/skip-merging-case-block.ll =================================================================== --- test/Transforms/CodeGenPrepare/skip-merging-case-block.ll +++ test/Transforms/CodeGenPrepare/skip-merging-case-block.ll @@ -143,11 +143,107 @@ !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1} !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1} +; while.cond does not dominate return, expect to skip merging empty block +; return.loopexit into return. +@b = external global i32, align 4 +@a = external global i32*, align 8 + +define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @f_switch4 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %return, label %if.end + +if.end: ; preds = %entry + %add = add i32 %i, 2 + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i32 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i32 0, label %return.loopexit +; CHECK: i32 47, label %return.loopexit +while.cond: ; preds = %while.cond.backedge, %if.end + switch i32 %magicptr, label %while.cond.if.end10_crit_edge [ + i32 0, label %return.loopexit + i32 47, label %return.loopexit + ] + +while.cond.if.end10_crit_edge: ; preds = %while.cond + br label %while.cond.backedge + +while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9 + br label %while.cond + +return.loopexit: ; preds = %while.cond + br label %return + +; CHECK_LABEL: return: +; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] +return: ; preds = %return.loopexit, %entry + %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] + ret void +} +declare i32 @__gxx_personality_v0(...) + +; Expect to merge empty block while.cond2.loopexit into while.cond2 +define i32 @f_switch5(i32 %i) local_unnamed_addr { +; CHECK-LABEL: @f_switch5 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %while.cond.preheader, label %sw.epilog + +while.cond.preheader: ; preds = %entry + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i64 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 1 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i64 32, label %while.cond2 +; CHECK: i64 0, label %while.cond2 +while.cond: ; preds = %land.rhs, %while.cond.preheader + switch i64 %magicptr, label %land.rhs [ + i64 32, label %while.cond2.loopexit + i64 0, label %while.cond2.loopexit + ] + +land.rhs: ; preds = %while.cond + %2 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp eq i32 %2, 0 + br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond + +while.cond2thread-pre-split.loopexit: ; preds = %land.rhs + br label %while.cond2thread-pre-split + +while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit + %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ] + br label %while.cond2 + +while.cond2.loopexit: ; preds = %while.cond, %while.cond + br label %while.cond2 + +; CHECK-LABEL: while.cond2: +; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ] +while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split + %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ] + %tobool3 = icmp eq i32* %3, null + br i1 %tobool3, label %sw.epilog, label %while.body4 + +while.body4: ; preds = %while.cond2 + tail call void bitcast (void (...)* @fn2 to void ()*)() + %.pr.pre = load i32*, i32** @a, align 8 + br label %while.cond2thread-pre-split + +sw.epilog: ; preds = %while.cond2, %entry + ret i32 undef +} + ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable() ; in the case where empty blocks are removed before creating BFI/BPI. -@b = common global i32 0, align 4 -@a = common global i32* null, align 8 define i32 @should_not_assert(i32 %i) local_unnamed_addr { entry: %0 = load i32, i32* @b, align 4