Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -212,6 +212,7 @@ const TargetTransformInfo *TTI = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; @@ -343,6 +344,10 @@ TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + OptSize = F.optForSize(); if (ProfileGuidedSectionPrefix) { @@ -705,6 +710,15 @@ !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) continue; + if (DT && ModifiedDT) { + DT->recalculate(F); + } else if (DT && !ModifiedDT) { + BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); + DT->changeImmediateDominator(DestBB, NewIDom); + DT->eraseNode(BB); + } eliminateMostlyEmptyBlock(BB); MadeChange = true; } @@ -714,6 +728,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader) { + Function &F = *BB->getParent(); // Do not delete loop preheaders if doing so would create a critical edge. // Loop preheaders can be good locations to spill registers. If the // preheader is deleted and we create a critical edge, registers may be @@ -755,6 +770,11 @@ return true; SmallPtrSet SameIncomingValueBBs; + SmallVector PNs; + + for (auto DestBBI = DestBB->begin(); + auto *DestPN = dyn_cast(&*DestBBI); ++DestBBI) + PNs.push_back(DestPN); // Find all other incoming blocks from which incoming values of all PHIs in // DestBB are the same as the ones from BB. @@ -764,16 +784,10 @@ if (DestBBPred == BB) continue; - bool HasAllSameValue = true; - BasicBlock::const_iterator DestBBI = DestBB->begin(); - while (const PHINode *DestPN = dyn_cast(DestBBI++)) { - if (DestPN->getIncomingValueForBlock(BB) != - DestPN->getIncomingValueForBlock(DestBBPred)) { - HasAllSameValue = false; - break; - } - } - if (HasAllSameValue) + if (llvm::none_of(PNs, [&](PHINode *PN) { + return (PN->getIncomingValueForBlock(BB) != + PN->getIncomingValueForBlock(DestBBPred)); + })) SameIncomingValueBBs.insert(DestBBPred); } @@ -783,9 +797,18 @@ if (SameIncomingValueBBs.count(Pred)) return true; + // Check to see if none of the phis have constant incoming values for BB and + // Pred dominates DestBB, in such case extra COPYs are likely not added, so + // there is no reason to skip merging. + if (DT && DT->dominates(Pred, DestBB) && + llvm::none_of(PNs, [BB](PHINode *PN) { + return isa(PN->getIncomingValueForBlock(BB)); + })) + return true; + if (!BFI) { - Function &F = *BB->getParent(); - LoopInfo LI{DominatorTree(F)}; + DominatorTree DT(F); + LoopInfo LI{DT}; BPI.reset(new BranchProbabilityInfo(F, LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -890,6 +913,7 @@ if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); + ModifiedDT = true; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } Index: test/Transforms/CodeGenPrepare/skip-merging-case-block.ll =================================================================== --- test/Transforms/CodeGenPrepare/skip-merging-case-block.ll +++ test/Transforms/CodeGenPrepare/skip-merging-case-block.ll @@ -143,11 +143,107 @@ !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1} !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1} +; while.cond does not dominate return, expect to skip merging empty block +; return.loopexit into return. +@b = external global i32, align 4 +@a = external global i32*, align 8 + +define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @f_switch4 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %return, label %if.end + +if.end: ; preds = %entry + %add = add i32 %i, 2 + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i32 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i32 0, label %return.loopexit +; CHECK: i32 47, label %return.loopexit +while.cond: ; preds = %while.cond.backedge, %if.end + switch i32 %magicptr, label %while.cond.if.end10_crit_edge [ + i32 0, label %return.loopexit + i32 47, label %return.loopexit + ] + +while.cond.if.end10_crit_edge: ; preds = %while.cond + br label %while.cond.backedge + +while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9 + br label %while.cond + +return.loopexit: ; preds = %while.cond + br label %return + +; CHECK_LABEL: return: +; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] +return: ; preds = %return.loopexit, %entry + %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] + ret void +} +declare i32 @__gxx_personality_v0(...) + +; Expect to merge empty block while.cond2.loopexit into while.cond2 +define i32 @f_switch5(i32 %i) local_unnamed_addr { +; CHECK-LABEL: @f_switch5 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %while.cond.preheader, label %sw.epilog + +while.cond.preheader: ; preds = %entry + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i64 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 1 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i64 32, label %while.cond2 +; CHECK: i64 0, label %while.cond2 +while.cond: ; preds = %land.rhs, %while.cond.preheader + switch i64 %magicptr, label %land.rhs [ + i64 32, label %while.cond2.loopexit + i64 0, label %while.cond2.loopexit + ] + +land.rhs: ; preds = %while.cond + %2 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp eq i32 %2, 0 + br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond + +while.cond2thread-pre-split.loopexit: ; preds = %land.rhs + br label %while.cond2thread-pre-split + +while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit + %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ] + br label %while.cond2 + +while.cond2.loopexit: ; preds = %while.cond, %while.cond + br label %while.cond2 + +; CHECK-LABEL: while.cond2: +; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ] +while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split + %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ] + %tobool3 = icmp eq i32* %3, null + br i1 %tobool3, label %sw.epilog, label %while.body4 + +while.body4: ; preds = %while.cond2 + tail call void bitcast (void (...)* @fn2 to void ()*)() + %.pr.pre = load i32*, i32** @a, align 8 + br label %while.cond2thread-pre-split + +sw.epilog: ; preds = %while.cond2, %entry + ret i32 undef +} + ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable() ; in the case where empty blocks are removed before creating BFI/BPI. -@b = common global i32 0, align 4 -@a = common global i32* null, align 8 define i32 @should_not_assert(i32 %i) local_unnamed_addr { entry: %0 = load i32, i32* @b, align 4