Index: llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -65,8 +65,30 @@ "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); +/// Maximum number of candidate blocks with same hash to consider for merging. +static const unsigned MaxBlockMergeCandidates = 32; STATISTIC(NumSimpl, "Number of blocks simplified"); +STATISTIC(NumBlocksMerged, "Number of blocks merged (total)"); +STATISTIC(NumBlocksMergedWithPhis, "Number of blocks merged (with phis)"); +STATISTIC(NumBlocksMergedWithMultipleSuccessors, + "Number of blocks merged (with multiple successors)"); +STATISTIC(NumBlocksMergedWithBackedge, + "Number of blocks merged (with backedge)"); + +/// Check whether replacing BB with ReplacementBB would result in a CallBr +/// instruction with a duplicate destination in one of the predecessors. +// FIXME: See note in CodeGenPrepare.cpp. +bool wouldDuplicateCallBrDest(const BasicBlock &BB, + const BasicBlock &ReplacementBB) { + for (const BasicBlock *Pred : predecessors(&BB)) { + if (auto *CBI = dyn_cast(Pred->getTerminator())) + for (const BasicBlock *Succ : successors(CBI)) + if (&ReplacementBB == Succ) + return true; + } + return false; +} /// If we have more than one empty (other than phi node) return blocks, /// merge them together to promote recursive block merging. @@ -104,19 +126,7 @@ continue; } - // Skip merging if this would result in a CallBr instruction with a - // duplicate destination. FIXME: See note in CodeGenPrepare.cpp. - bool SkipCallBr = false; - for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); - PI != E && !SkipCallBr; ++PI) { - if (auto *CBI = dyn_cast((*PI)->getTerminator())) - for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) - if (RetBlock == CBI->getSuccessor(i)) { - SkipCallBr = true; - break; - } - } - if (SkipCallBr) + if (wouldDuplicateCallBrDest(BB, *RetBlock)) continue; // Otherwise, we found a duplicate return block. Merge the two. @@ -158,6 +168,217 @@ return Changed; } +class HashAccumulator64 { + uint64_t Hash; + +public: + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + uint64_t getHash() { return Hash; } +}; + +static uint64_t hashBlock(const BasicBlock &BB) { + HashAccumulator64 Acc; + for (const Instruction &I : BB) + Acc.add(I.getOpcode()); + for (const BasicBlock *Succ : successors(&BB)) + Acc.add((uintptr_t)Succ); + return Acc.getHash(); +} + +using BackedgesSet = + SmallDenseSet, 8>; + +/// Whether we are merging a non-backedge together with a backedge. +static bool isMergeWithBackedge(const BasicBlock &BB1, const BasicBlock &BB2, + const BackedgesSet &Backedges) { + for (auto Blocks : llvm::zip(successors(&BB1), successors(&BB2))) + if (Backedges.count({&BB1, std::get<0>(Blocks)}) != + Backedges.count({&BB2, std::get<1>(Blocks)})) + return true; + + return false; +} + +static bool canMergeBlocks(const BasicBlock &BB1, const BasicBlock &BB2, + bool IsMergeWithBackedge) { + // Quickly bail out if successors don't match. + if (!std::equal(succ_begin(&BB1), succ_end(&BB1), succ_begin(&BB2), + succ_end(&BB2))) + return false; + + // Map from instructions in one block to instructions in the other. + SmallDenseMap Map; + auto ValuesEqual = [&Map](const Value *V1, const Value *V2) { + if (V1 == V2) + return true; + + if (const auto *I1 = dyn_cast(V1)) + if (const auto *I2 = dyn_cast(V2)) + if (const Instruction *Mapped = Map.lookup(I1)) + if (Mapped == I2) + return true; + + return false; + }; + + auto InstructionsEqual = [&](const Instruction &I1, const Instruction &I2) { + if (!I1.isSameOperationAs(&I2)) + return false; + + if (const PHINode *PHI1 = dyn_cast(&I1)) { + // Do not merge blocks with phi nodes, if we're merging a non-backedge + // with a backedge. Consider the case where we have two blocks, where + // bb1 dominates bb2. + // + // bb1: %phi1(...) + // br bb2 + // + // bb2: %phi2(%phi1, %phi3) + // br bb3, bb4 + // + // bb3: %phi3(...) + // br bb2 + // + // bb4: use %phi1 + // + // We cannot (generally) merge bb1 and bb3, even if the phi nodes are + // otherwise compatible, because doing so would remove the possibility of + // referencing %phi1 in bb4. Because merged blocks must have the same + // successors, the only way this pattern can appear is if the edge from + // one of the blocks is a non-backedge (bb1->bb2), while the other is a + // backedge (bb3->bb2). + if (IsMergeWithBackedge) + return false; + + // For PHI nodes, values must match for predecessors that are present in + // both PHI nodes. We ignore other predecessors, because we will merge + // the PHI nodes together. We use simple value equalty (rather than the + // ValuesEqual predicate) here, because phi node operands refer to values + // from the incoming blocks, as such the remapping in this block is not + // meaningful. + const PHINode *PHI2 = cast(&I2); + for (unsigned I1 = 0; I1 < PHI1->getNumIncomingValues(); I1++) { + int I2 = PHI2->getBasicBlockIndex(PHI1->getIncomingBlock(I1)); + if (I2 >= 0 && PHI1->getIncomingValue(I1) != PHI2->getIncomingValue(I2)) + return false; + } + } else { + // For normal instructions, operands must match (modulo remapping). + if (!std::equal(I1.op_begin(), I1.op_end(), I2.op_begin(), I2.op_end(), + ValuesEqual)) + return false; + + if (const auto *Call = dyn_cast(&I1)) + if (Call->cannotMerge()) + return false; + } + + if (!I1.use_empty()) + Map.insert({&I1, &I2}); + return true; + }; + auto It1 = BB1.instructionsWithoutDebug(); + auto It2 = BB2.instructionsWithoutDebug(); + if (!std::equal(It1.begin(), It1.end(), It2.begin(), It2.end(), + InstructionsEqual)) + return false; + + // Make sure phi values in successor blocks match. + for (const BasicBlock *Succ : successors(&BB1)) { + for (const PHINode &Phi : Succ->phis()) { + const Value *Incoming1 = Phi.getIncomingValueForBlock(&BB1); + const Value *Incoming2 = Phi.getIncomingValueForBlock(&BB2); + if (!ValuesEqual(Incoming1, Incoming2)) + return false; + } + } + + if (wouldDuplicateCallBrDest(BB1, BB2)) + return false; + + return true; +} + +static bool tryMergeTwoBlocks(BasicBlock &BB1, BasicBlock &BB2, + bool IsMergeWithBackedge) { + if (!canMergeBlocks(BB1, BB2, IsMergeWithBackedge)) + return false; + + // We will keep BB1 and drop BB2. Merge metadata and attributes. + for (auto Insts : llvm::zip(BB1, BB2)) { + Instruction &I1 = std::get<0>(Insts); + const Instruction &I2 = std::get<1>(Insts); + + // Incoming values for PHI nodes are merged. + if (auto *PN1 = dyn_cast(&I1)) { + auto *PN2 = cast(&I2); + for (unsigned I = 0; I < PN2->getNumIncomingValues(); ++I) + PN1->addIncoming(PN2->getIncomingValue(I), PN2->getIncomingBlock(I)); + } + + I1.andIRFlags(&I2); + combineMetadataForCSE(&I1, &I2, true); + if (!isa(&I1)) + I1.applyMergedLocation(I1.getDebugLoc(), I2.getDebugLoc()); + } + + // Store predecessors, because they will be modified in this loop. + SmallVector Preds(predecessors(&BB2)); + for (BasicBlock *Pred : Preds) + Pred->getTerminator()->replaceSuccessorWith(&BB2, &BB1); + + for (BasicBlock *Succ : successors(&BB2)) + Succ->removePredecessor(&BB2); + + BB2.eraseFromParent(); + return true; +} + +static bool mergeIdenticalBlocks(Function &F) { + bool Changed = false; + SmallDenseMap> SameHashBlocks; + + // TODO: Keep backedges updated during transforms. + SmallVector, 8> + BackedgesVector; + BackedgesSet Backedges; + FindFunctionBackedges(F, BackedgesVector); + for (const auto &Backedge : BackedgesVector) + Backedges.insert(Backedge); + + for (BasicBlock &BB : make_early_inc_range(F)) { + // The entry block cannot be merged. + if (&BB == &F.getEntryBlock()) + continue; + + // Identify potential merging candidates based on a basic block hash. + bool Merged = false; + auto &Blocks = SameHashBlocks.try_emplace(hashBlock(BB)).first->second; + for (BasicBlock *Block : Blocks) { + bool IsMergeWithBackedge = isMergeWithBackedge(*Block, BB, Backedges); + if (tryMergeTwoBlocks(*Block, BB, IsMergeWithBackedge)) { + Merged = true; + ++NumBlocksMerged; + if (!llvm::empty(Block->phis())) + ++NumBlocksMergedWithPhis; + if (llvm::size(successors(Block)) > 1) + ++NumBlocksMergedWithMultipleSuccessors; + if (IsMergeWithBackedge) + ++NumBlocksMergedWithBackedge; + break; + } + } + + Changed |= Merged; + if (!Merged && Blocks.size() < MaxBlockMergeCandidates) + Blocks.push_back(&BB); + } + + // TODO: Merge iteratively. + return Changed; +} + /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, @@ -190,6 +411,7 @@ const SimplifyCFGOptions &Options) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= mergeIdenticalBlocks(F); EverChanged |= iterativelySimplifyCFG(F, TTI, Options); // If neither pass changed anything, we're done. Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5832,83 +5832,6 @@ return Changed; } -/// Given an block with only a single landing pad and a unconditional branch -/// try to find another basic block which this one can be merged with. This -/// handles cases where we have multiple invokes with unique landing pads, but -/// a shared handler. -/// -/// We specifically choose to not worry about merging non-empty blocks -/// here. That is a PRE/scheduling problem and is best solved elsewhere. In -/// practice, the optimizer produces empty landing pad blocks quite frequently -/// when dealing with exception dense code. (see: instcombine, gvn, if-else -/// sinking in this file) -/// -/// This is primarily a code size optimization. We need to avoid performing -/// any transform which might inhibit optimization (such as our ability to -/// specialize a particular handler via tail commoning). We do this by not -/// merging any blocks which require us to introduce a phi. Since the same -/// values are flowing through both blocks, we don't lose any ability to -/// specialize. If anything, we make such specialization more likely. -/// -/// TODO - This transformation could remove entries from a phi in the target -/// block when the inputs in the phi are the same for the two blocks being -/// merged. In some cases, this could result in removal of the PHI entirely. -static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB) { - auto Succ = BB->getUniqueSuccessor(); - assert(Succ); - // If there's a phi in the successor block, we'd likely have to introduce - // a phi into the merged landing pad block. - if (isa(*Succ->begin())) - return false; - - for (BasicBlock *OtherPred : predecessors(Succ)) { - if (BB == OtherPred) - continue; - BasicBlock::iterator I = OtherPred->begin(); - LandingPadInst *LPad2 = dyn_cast(I); - if (!LPad2 || !LPad2->isIdenticalTo(LPad)) - continue; - for (++I; isa(I); ++I) - ; - BranchInst *BI2 = dyn_cast(I); - if (!BI2 || !BI2->isIdenticalTo(BI)) - continue; - - // We've found an identical block. Update our predecessors to take that - // path instead and make ourselves dead. - SmallPtrSet Preds; - Preds.insert(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { - InvokeInst *II = cast(Pred->getTerminator()); - assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && - "unexpected successor"); - II->setUnwindDest(OtherPred); - } - - // The debug info in OtherPred doesn't cover the merged control flow that - // used to go through BB. We need to delete it or update it. - for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { - Instruction &Inst = *I; - I++; - if (isa(Inst)) - Inst.eraseFromParent(); - } - - SmallPtrSet Succs; - Succs.insert(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { - Succ->removePredecessor(BB); - } - - IRBuilder<> Builder(BI); - Builder.CreateUnreachable(); - BI->eraseFromParent(); - return true; - } - return false; -} - bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder) : simplifyCondBranch(Branch, Builder); @@ -5946,15 +5869,6 @@ return true; } - // See if we can merge an empty landing pad block with another which is - // equivalent. - if (LandingPadInst *LPad = dyn_cast(I)) { - for (++I; isa(I); ++I) - ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) - return true; - } - // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value Index: llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll =================================================================== --- llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll +++ llvm/test/Transforms/LoopDeletion/simplify-then-delete.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -indvars -loop-deletion -simplifycfg | FileCheck %s +; RUN: opt < %s -S -indvars -loop-deletion -simplifycfg -instsimplify | FileCheck %s ; PR5794 ; Indvars and loop deletion should be able to eliminate all looping Index: llvm/test/Transforms/LoopUnswitch/infinite-loop.ll =================================================================== --- llvm/test/Transforms/LoopUnswitch/infinite-loop.ll +++ llvm/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split -; CHECK: for.body: -; CHECK-NEXT: br label %for.body +; CHECK: entry.split.split: +; CHECK-NEXT: br label %entry.split.split ; CHECK: abort0.split: ; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]] Index: llvm/test/Transforms/PGOProfile/chr.ll =================================================================== --- llvm/test/Transforms/PGOProfile/chr.ll +++ llvm/test/Transforms/PGOProfile/chr.ll @@ -32,6 +32,8 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: @@ -44,10 +46,7 @@ ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 -; CHECK: bb2.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB3]] +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2]], !prof !16 ; CHECK: bb3: ; CHECK-NEXT: ret void ; @@ -105,6 +104,8 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: entry.split.nonchr: @@ -124,10 +125,7 @@ ; CHECK: bb3.nonchr: ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16 -; CHECK: bb4.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB5]] +; CHECK-NEXT: br i1 [[TMP8]], label [[BB5]], label [[BB4]], !prof !16 ; CHECK: bb5: ; CHECK-NEXT: ret void ; @@ -191,6 +189,8 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb1: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB4:%.*]] ; CHECK: entry.split.nonchr: @@ -204,10 +204,7 @@ ; CHECK: bb2.nonchr: ; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16 -; CHECK: bb3.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB4]] +; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB3]], !prof !16 ; CHECK: bb1.nonchr: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB2_NONCHR]] @@ -284,6 +281,8 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: @@ -296,10 +295,7 @@ ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 -; CHECK: bb2.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB3]] +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2]], !prof !16 ; CHECK: bb3: ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 12 @@ -307,6 +303,8 @@ ; CHECK-NEXT: br i1 [[TMP9]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb4: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb6: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb3.split.nonchr: @@ -319,10 +317,7 @@ ; CHECK: bb5.nonchr: ; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP7]], 8 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16 -; CHECK: bb6.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB7]] +; CHECK-NEXT: br i1 [[TMP13]], label [[BB7]], label [[BB6]], !prof !16 ; CHECK: bb7: ; CHECK-NEXT: ret void ; @@ -803,6 +798,8 @@ ; CHECK-NEXT: br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: @@ -815,10 +812,7 @@ ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[J0]], 8 ; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0 -; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 -; CHECK: bb2.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB3]] +; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2]], !prof !16 ; CHECK: bb3: ; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 ; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 @@ -943,6 +937,8 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3:%.*]] @@ -956,14 +952,10 @@ ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 -; CHECK: bb2.nonchr: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[J]], align 4 -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB3]] +; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2]], !prof !16 ; CHECK: bb3: -; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP8]], [[BB2_NONCHR]] ] -; CHECK-NEXT: ret i32 [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[BB2]] ], [ [[TMP0]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[TMP8]] ; entry: %0 = load i32, i32* %i @@ -1120,6 +1112,8 @@ ; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: entry.split.nonchr: @@ -1133,10 +1127,7 @@ ; CHECK-NEXT: [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]] ; CHECK-NEXT: [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32 ; CHECK-NEXT: [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1 -; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 -; CHECK: bb2.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB3]] +; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2]], !prof !16 ; CHECK: bb3: ; CHECK-NEXT: ret void ; @@ -1917,18 +1908,19 @@ ; CHECK-NEXT: ], !prof !20 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb2.nonchr1: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3_NONCHR2]] ; CHECK: bb3.nonchr2: -; CHECK-NEXT: br i1 [[CMP_I]], label [[BB4_NONCHR3:%.*]], label [[BB7]], !prof !18 -; CHECK: bb4.nonchr3: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB7]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[BB4]], label [[BB7]], !prof !18 ; CHECK: bb7: ; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb9: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: entry.split.nonchr: @@ -1943,10 +1935,7 @@ ; CHECK-NEXT: [[CMP3_NONCHR:%.*]] = icmp eq i64 [[J]], [[I]] ; CHECK-NEXT: br i1 [[CMP3_NONCHR]], label [[BB8_NONCHR:%.*]], label [[BB7_NONCHR:%.*]], !prof !16 ; CHECK: bb8.nonchr: -; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB10]], label [[BB9_NONCHR:%.*]], !prof !16 -; CHECK: bb9.nonchr: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label [[BB10]] +; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB10]], label [[BB9]], !prof !16 ; CHECK: bb7.nonchr: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB8_NONCHR]] Index: llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll +++ llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll @@ -75,16 +75,14 @@ ; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ ; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]] ; NO_FWD-NEXT: i32 19, label [[IF19:%.*]] -; NO_FWD-NEXT: i32 42, label [[IF42:%.*]] +; NO_FWD-NEXT: i32 42, label [[IF19]] ; NO_FWD-NEXT: ] ; NO_FWD: if19: ; NO_FWD-NEXT: br label [[RETURN]] -; NO_FWD: if42: -; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: else3: ; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: return: -; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] +; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] ; NO_FWD-NEXT: ret i32 [[R]] ; ; FWD-LABEL: @PR34471( Index: llvm/test/Transforms/SimplifyCFG/HoistCode.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/HoistCode.ll +++ llvm/test/Transforms/SimplifyCFG/HoistCode.ll @@ -3,7 +3,8 @@ define void @foo(i1 %C, i32* %P) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: store i32 7, i32* [[P:%.*]] +; CHECK-NEXT: T: +; CHECK-NEXT: store i32 7, i32* [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; br i1 %C, label %T, label %F Index: llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1213,7 +1213,7 @@ ; CHECK-LABEL: @linearmap1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 10 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 3 ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: ; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8 @@ -1304,7 +1304,7 @@ ; CHECK-LABEL: @linearmap4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], -2 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 3 ; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] ; CHECK: switch.lookup: ; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8 Index: llvm/test/Transforms/SimplifyCFG/duplicate-landingpad.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/duplicate-landingpad.ll +++ llvm/test/Transforms/SimplifyCFG/duplicate-landingpad.ll @@ -10,14 +10,14 @@ ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @fn() -; CHECK-NEXT: to label [[INVOKE2:%.*]] unwind label [[LPAD2:%.*]] +; CHECK-NEXT: to label [[INVOKE2:%.*]] unwind label [[LPAD1:%.*]] ; CHECK: invoke2: ; CHECK-NEXT: invoke void @fn() -; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD2]] +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD1]] ; CHECK: invoke.cont: ; CHECK-NEXT: ret void -; CHECK: lpad2: -; CHECK-NEXT: [[EXN2:%.*]] = landingpad { i8*, i32 } +; CHECK: lpad1: +; CHECK-NEXT: [[EXN:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: call void @fn() ; CHECK-NEXT: ret void Index: llvm/test/Transforms/SimplifyCFG/merge-blocks.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/merge-blocks.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -simplifycfg < %s | FileCheck %s + +declare void @dummy() + +define i32 @basic(i32 %x, i32* %p) { +; CHECK-LABEL: @basic( +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + store i32 0, i32* %p + br label %exit + +bb2: + store i32 0, i32* %p + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; nonnull present in block blocks, keep it. +define i32 @metadata_nonnull_keep(i32 %x, i32** %p1, i32** %p2) { +; CHECK-LABEL: @metadata_nonnull_keep( +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[V1:%.*]] = load i32*, i32** [[P1:%.*]], align 4, !nonnull !0 +; CHECK-NEXT: store i32* [[V1]], i32** [[P2:%.*]], align 8 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + %v1 = load i32*, i32** %p1, align 4, !nonnull !{} + store i32* %v1, i32** %p2 + br label %exit + +bb2: + %v2 = load i32*, i32** %p1, align 4, !nonnull !{} + store i32* %v2, i32** %p2 + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; nonnull only present in one of the blocks, drop it. +define i32 @metadata_nonnull_drop(i32 %x, i32** %p1, i32** %p2) { +; CHECK-LABEL: @metadata_nonnull_drop( +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[V1:%.*]] = load i32*, i32** [[P1:%.*]], align 4 +; CHECK-NEXT: store i32* [[V1]], i32** [[P2:%.*]], align 8 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + %v1 = load i32*, i32** %p1, align 4, !nonnull !{} + store i32* %v1, i32** %p2 + br label %exit + +bb2: + %v2 = load i32*, i32** %p1, align 4 + store i32* %v2, i32** %p2 + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; The union of both range metadatas should be taken. +define i32 @metadata_range(i32 %x, i32* %p1, i32* %p2) { +; CHECK-LABEL: @metadata_range( +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4, !range !1 +; CHECK-NEXT: store i32 [[V1]], i32* [[P2:%.*]], align 4 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + %v1 = load i32, i32* %p1, align 4, !range !{i32 0, i32 10} + store i32 %v1, i32* %p2 + br label %exit + +bb2: + %v2 = load i32, i32* %p1, align 4, !range !{i32 5, i32 15} + store i32 %v2, i32* %p2 + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; Only the common nuw flag may be preserved. +define i32 @attributes(i32 %x, i32 %y) { +; CHECK-LABEL: @attributes( +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[A:%.*]] = add nuw i32 [[Y:%.*]], 1 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[A]], [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + %a = add nuw nsw i32 %y, 1 + br label %exit + +bb2: + %b = add nuw i32 %y, 1 + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ %a, %bb1 ], [ %b, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; Don't try to merge with the entry block. +define void @entry_block() { +; CHECK-LABEL: @entry_block( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + br label %loop +} + +; Callbr may not have duplicate destinations. +define void @callbr() { +; CHECK-LABEL: @callbr( +; CHECK-NEXT: entry: +; CHECK-NEXT: callbr void asm sideeffect "", "X"(i8* blockaddress(@callbr, [[BB1:%.*]])) +; CHECK-NEXT: to label [[BB2:%.*]] [label %bb1] +; CHECK: bb1: +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: ret void +; +entry: + callbr void asm sideeffect "", "X"(i8* blockaddress(@callbr, %bb1)) + to label %bb2 [ label %bb1 ] + +bb1: + ret void + +bb2: + ret void +} + +; This requires merging bb3,4,5,6 before bb1,2. +define i32 @two_level(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @two_level( +; CHECK-NEXT: switch i32 [[Z:%.*]], label [[BB7:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB1:%.*]] +; CHECK-NEXT: i32 1, label [[BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB3:%.*]], label [[BB7]] +; CHECK: bb2: +; CHECK-NEXT: [[SWITCH1:%.*]] = icmp ult i32 [[X]], 2 +; CHECK-NEXT: br i1 [[SWITCH1]], label [[BB3]], label [[BB7]] +; CHECK: bb3: +; CHECK-NEXT: [[A:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb7: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[A]], [[BB3]] ], [ 0, [[BB7]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %z, label %bb7 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +bb1: + switch i32 %x, label %bb7 [ + i32 0, label %bb3 + i32 1, label %bb4 + ] + +bb2: + switch i32 %x, label %bb7 [ + i32 0, label %bb5 + i32 1, label %bb6 + ] + +bb3: + %a = add i32 %y, 1 + br label %exit + +bb4: + %b = add i32 %y, 1 + br label %exit + +bb5: + %c = add i32 %y, 1 + br label %exit + +bb6: + %d = add i32 %y, 1 + br label %exit + +bb7: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ %a, %bb3 ], [ %b, %bb4 ], [ %c, %bb5 ], [ %d, %bb6 ], [ 0, %bb7 ] + ret i32 %phi +} + +; The blocks can be merged if the phi nodes are. +define i32 @phi_merge(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @phi_merge( +; CHECK-NEXT: switch i32 [[Y:%.*]], label [[SW:%.*]] [ +; CHECK-NEXT: i32 0, label [[IND1:%.*]] +; CHECK-NEXT: i32 1, label [[IND2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw: +; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[X:%.*]], 2 +; CHECK-NEXT: br i1 [[SWITCH]], label [[BB1:%.*]], label [[BB3:%.*]] +; CHECK: ind1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[BB1]] +; CHECK: ind2: +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ 1, [[IND1]] ], [ 0, [[SW]] ], [ 2, [[IND2]] ] +; CHECK-NEXT: store i32 [[PHI1]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %y, label %sw [ + i32 0, label %ind1 + i32 1, label %ind2 + ] + +sw: + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +ind1: + store i32 0, i32* %p + br label %bb1 + +ind2: + store i32 1, i32* %p + br label %bb2 + +bb1: + %phi1 = phi i32 [ 0, %sw ], [ 1, %ind1 ] + store i32 %phi1, i32* %p + br label %exit + +bb2: + %phi2 = phi i32 [ 0, %sw ], [ 2, %ind2 ] + store i32 %phi2, i32* %p + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; The blocks cannot be merged, because the incoming values for %sw +; are different. +define i32 @phi_merge_different_incoming(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @phi_merge_different_incoming( +; CHECK-NEXT: switch i32 [[Y:%.*]], label [[SW:%.*]] [ +; CHECK-NEXT: i32 0, label [[IND1:%.*]] +; CHECK-NEXT: i32 1, label [[IND2:%.*]] +; CHECK-NEXT: ] +; CHECK: sw: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[BB3:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB1:%.*]] +; CHECK-NEXT: i32 1, label [[BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: ind1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[BB1]] +; CHECK: ind2: +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb1: +; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ 0, [[SW]] ], [ 1, [[IND1]] ] +; CHECK-NEXT: store i32 [[PHI1]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 3, [[SW]] ], [ 2, [[IND2]] ] +; CHECK-NEXT: store i32 [[PHI2]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: bb3: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB1]] ], [ 0, [[BB2]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; + switch i32 %y, label %sw [ + i32 0, label %ind1 + i32 1, label %ind2 + ] + +sw: + switch i32 %x, label %bb3 [ + i32 0, label %bb1 + i32 1, label %bb2 + ] + +ind1: + store i32 0, i32* %p + br label %bb1 + +ind2: + store i32 1, i32* %p + br label %bb2 + +bb1: + %phi1 = phi i32 [ 0, %sw ], [ 1, %ind1 ] + store i32 %phi1, i32* %p + br label %exit + +bb2: + %phi2 = phi i32 [ 3, %sw ], [ 2, %ind2 ] + store i32 %phi2, i32* %p + br label %exit + +bb3: + call void @dummy() + br label %exit + +exit: + %phi = phi i32 [ 0, %bb1 ], [ 0, %bb2 ], [ 1, %bb3] + ret i32 %phi +} + +; Cannot merge loop.entry and loop.latch, because loop.entry dominates +; loop.latch dominates a use of the loop.entry phi. +define i32 @phi_merge_loop_header(i1 %c1, i1 %c2, i1 %c3, i32* %p) { +; CHECK-LABEL: @phi_merge_loop_header( +; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_THEN1:%.*]], label [[ENTRY_THEN2:%.*]] +; CHECK: entry.then1: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: entry.then2: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[LOOP_ENTRY]] +; CHECK: loop.entry: +; CHECK-NEXT: [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY_THEN1]] ], [ 1, [[ENTRY_THEN2]] ] +; CHECK-NEXT: br label [[LOOP_COND:%.*]] +; CHECK: loop.cond: +; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ [[PHI0]], [[LOOP_ENTRY]] ], [ [[PHI2:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP_IF:%.*]], label [[LOOP_EXIT:%.*]] +; CHECK: loop.if: +; CHECK-NEXT: br i1 [[C3:%.*]], label [[LOOP_THEN1:%.*]], label [[LOOP_THEN2:%.*]] +; CHECK: loop.then1: +; CHECK-NEXT: store i32 0, i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.then2: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[PHI2]] = phi i32 [ 2, [[LOOP_THEN1]] ], [ 3, [[LOOP_THEN2]] ] +; CHECK-NEXT: br label [[LOOP_COND]] +; CHECK: loop.exit: +; CHECK-NEXT: ret i32 [[PHI0]] +; + br i1 %c1, label %entry.then1, label %entry.then2 + +entry.then1: + store i32 0, i32* %p + br label %loop.entry + +entry.then2: + call void @dummy() + br label %loop.entry + +loop.entry: + %phi0 = phi i32 [ 0, %entry.then1 ], [ 1, %entry.then2 ] + br label %loop.cond + +loop.cond: + %phi1 = phi i32 [ %phi0, %loop.entry ], [ %phi2, %loop.latch ] + br i1 %c2, label %loop.if, label %loop.exit + +loop.if: + br i1 %c3, label %loop.then1, label %loop.then2 + +loop.then1: + store i32 0, i32* %p + br label %loop.latch + +loop.then2: + call void @dummy() + br label %loop.latch + +loop.latch: + %phi2 = phi i32 [ 2, %loop.then1 ], [ 3, %loop.then2 ] + br label %loop.cond + +loop.exit: + ret i32 %phi0 +} + +; CHECK: !1 = !{i32 0, i32 15} Index: llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll +++ llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll @@ -123,10 +123,7 @@ ; CHECK-NEXT: ret i32 [[DEOPTRET]] ; CHECK: guarded: ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[RETURN:%.*]], label [[DEOPT2:%.*]], !prof !0 -; CHECK: deopt2: -; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] -; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0 ; CHECK: return: ; CHECK-NEXT: ret i32 0 ; @@ -163,10 +160,7 @@ ; CHECK: guarded: ; CHECK-NEXT: [[V:%.*]] = call i32 @unknown_i32() ; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i32 [[V]], 0 -; CHECK-NEXT: br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT2:%.*]], !prof !0 -; CHECK: deopt2: -; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] -; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK-NEXT: br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0 ; CHECK: return: ; CHECK-NEXT: ret i32 0 ; @@ -270,19 +264,15 @@ ; CHECK-LABEL: @neg_loop( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[GUARDED:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] ; CHECK: guarded: ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[LOOP:%.*]], label [[DEOPT2:%.*]], !prof !0 -; CHECK: deopt2: -; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] -; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[COND_1:%.*]], [[EXIPLICIT_GUARD_COND]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !2 ; entry: br label %guarded @@ -313,23 +303,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND2:%.*]] = and i1 [[COND_1:%.*]], [[WIDENABLE_COND]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXIPLICIT_GUARD_COND]], [[EXIPLICIT_GUARD_COND2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[GUARDED2:%.*]], label [[DEOPT:%.*]], !prof !2 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] -; CHECK: guarded: -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND2:%.*]] = and i1 [[COND_1:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND2]], label [[GUARDED2:%.*]], label [[DEOPT2:%.*]], !prof !0 -; CHECK: deopt2: -; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] -; CHECK-NEXT: ret i32 [[DEOPTRET2]] ; CHECK: guarded2: ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[V]], 0 -; CHECK-NEXT: br i1 [[COND_2]], label [[RETURN:%.*]], label [[DEOPT3:%.*]], !prof !0 -; CHECK: deopt3: -; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] -; CHECK-NEXT: ret i32 [[DEOPTRET3]] +; CHECK-NEXT: br i1 [[COND_2]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0 ; CHECK: return: ; CHECK-NEXT: ret i32 0 ;