Index: llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp +++ llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp @@ -41,9 +41,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" @@ -396,9 +399,10 @@ void dump() const; #endif // MERGEICMPS_DOT_ON - bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA); + bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA, + DomTreeUpdater &DTU); - private: +private: static bool IsContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) { return First.Lhs().BaseId == Second.Lhs().BaseId && @@ -583,10 +587,11 @@ // Merges the given contiguous comparison blocks into one memcmp block. static BasicBlock *mergeComparisons(ArrayRef Comparisons, + BasicBlock *const InsertBefore, BasicBlock *const NextCmpBlock, PHINode &Phi, const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { + AliasAnalysis *AA, DomTreeUpdater &DTU) { assert(!Comparisons.empty() && "merging zero comparisons"); LLVMContext &Context = NextCmpBlock->getContext(); const BCECmpBlock &FirstCmp = Comparisons[0]; @@ -594,13 +599,15 @@ // Create a new cmp block before next cmp block. BasicBlock *const BB = BasicBlock::Create(Context, MergedBlockName(Comparisons).Name, - NextCmpBlock->getParent(), NextCmpBlock); + NextCmpBlock->getParent(), InsertBefore); IRBuilder<> Builder(BB); // Add the GEPs from the first BCECmpBlock. Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); Value *IsEqual = nullptr; + LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " + << BB->getName() << "\n"); if (Comparisons.size() == 1) { LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); Value *const LhsLoad = @@ -610,8 +617,6 @@ // There are no blocks to merge, just do the comparison. IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); } else { - LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); - // If there is one block that requires splitting, we do it now, i.e. // just before we know we will collapse the chain. The instructions // can be executed before any of the instructions in the chain. @@ -641,18 +646,21 @@ // Add a branch to the next basic block in the chain. if (NextCmpBlock == PhiBB) { // Continue to phi, passing it the comparison result. - Builder.CreateBr(Phi.getParent()); + Builder.CreateBr(PhiBB); Phi.addIncoming(IsEqual, BB); + DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}}); } else { // Continue to next block if equal, exit to phi else. Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB); Phi.addIncoming(ConstantInt::getFalse(Context), BB); + DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock}, + {DominatorTree::Insert, BB, PhiBB}}); } return BB; } bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { + AliasAnalysis *AA, DomTreeUpdater &DTU) { assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain"); // First pass to check if there is at least one merge. If not, we don't do // anything and we keep analysis passes intact. @@ -671,9 +679,11 @@ // Effectively merge blocks. We go in the reverse direction from the phi block // so that the next block is always available to branch to. - const auto mergeRange = [this, TLI, AA](int I, int Num, BasicBlock *Next) { - return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), Next, - Phi_, TLI, AA); + const auto mergeRange = [this, TLI, AA, &DTU](int I, int Num, + BasicBlock *InsertBefore, + BasicBlock *Next) { + return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), + InsertBefore, Next, Phi_, TLI, AA, DTU); }; int NumMerged = 1; BasicBlock *NextCmpBlock = Phi_.getParent(); @@ -684,11 +694,14 @@ << "\n"); ++NumMerged; } else { - NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock); + NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock); NumMerged = 1; } } - NextCmpBlock = mergeRange(0, NumMerged, NextCmpBlock); + // Insert the entry block for the new chain before the old entry block. + // If the old entry block was the function entry, this ensures that the new + // entry can become the function entry. + NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock); // Replace the original cmp chain with the new cmp chain by pointing all // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp @@ -698,6 +711,20 @@ LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName() << "\n"); Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock); + DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_}, + {DominatorTree::Insert, Pred, NextCmpBlock}}); + } + + // If the old cmp chain was the function entry, we need to update the function + // entry. + const bool ChainEntryIsFnEntry = + (EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock()); + if (ChainEntryIsFnEntry && DTU.hasDomTree()) { + LLVM_DEBUG(dbgs() << "Changing function entry from " + << EntryBlock_->getName() << " to " + << NextCmpBlock->getName() << "\n"); + DTU.getDomTree().setNewRoot(NextCmpBlock); + DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}}); } EntryBlock_ = nullptr; @@ -707,7 +734,7 @@ LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n"); DeadBlocks.push_back(Cmp.BB); } - DeleteDeadBlocks(DeadBlocks); + DeleteDeadBlocks(DeadBlocks, &DTU); Comparisons_.clear(); return true; @@ -749,7 +776,7 @@ } bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { + AliasAnalysis *AA, DomTreeUpdater &DTU) { LLVM_DEBUG(dbgs() << "processPhi()\n"); if (Phi.getNumIncomingValues() <= 1) { LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); @@ -814,7 +841,7 @@ return false; } - return CmpChain.simplify(TLI, AA); + return CmpChain.simplify(TLI, AA, DTU); } class MergeICmps : public FunctionPass { @@ -829,8 +856,14 @@ if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); const auto &TTI = getAnalysis().getTTI(F); + // MergeICmps does not need the DominatorTree, but we update it if it's + // already available. + auto *DTWP = getAnalysisIfAvailable(); + DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr, + /*PostDominatorTree*/ nullptr, + DomTreeUpdater::UpdateStrategy::Eager); AliasAnalysis *AA = &getAnalysis().getAAResults(); - auto PA = runImpl(F, &TLI, &TTI, AA); + auto PA = runImpl(F, &TLI, &TTI, AA, DTU); return !PA.areAllPreserved(); } @@ -839,15 +872,18 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); } PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, AliasAnalysis *AA); + const TargetTransformInfo *TTI, AliasAnalysis *AA, + DomTreeUpdater &DTU); }; PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - AliasAnalysis *AA) { + AliasAnalysis *AA, DomTreeUpdater &DTU) { LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); // We only try merging comparisons if the target wants to expand memcmp later. @@ -863,11 +899,15 @@ for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { // A Phi operation is always first in a basic block. if (auto *const Phi = dyn_cast(&*BBIt->begin())) - MadeChange |= processPhi(*Phi, TLI, AA); + MadeChange |= processPhi(*Phi, TLI, AA, DTU); } - if (MadeChange) return PreservedAnalyses::none(); - return PreservedAnalyses::all(); + if (!MadeChange) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; } } // namespace Index: llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86 %S = type { i32, i32, i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s %S = type { i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s %S = type { i32, i32, i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s %S = type { i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86 ; 8-byte int and 8-byte pointer should merge into a 16-byte memcmp. ; X86: memcmp(i8* {{.*}}, i8* {{.*}}, i64 16) Index: llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s %S = type { i32, i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 %S = type { i32, i32, i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN %S = type { i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 source_filename = "qabstractitemmodeltester.cpp" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/trunk/test/Transforms/MergeICmps/X86/pr41917.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/pr41917.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/pr41917.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -S | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i386-pc-windows-msvc19.11.0" Index: llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 %S = type { i32, i32, i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll @@ -1,6 +1,6 @@ ; XFAIL: * ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s ; This is a more involved test: clang generates this weird pattern for ; tuple. Right now we skip the entry block Index: llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 %S = type { i32, i32 } Index: llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s %S = type { i32, i32 }