Index: llvm/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -251,6 +252,15 @@
     "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
     cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
 
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyDT = true;
+#else
+static bool VerifyDT = false;
+#endif
+static cl::opt<bool, true> VerifyDTUpdates(
+    "cgp-verify-dt-updates", cl::location(VerifyDT), cl::Hidden,
+    cl::desc("Verify dominator tree updates in CodeGenPrepare"));
+
 static cl::opt<bool>
     VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
                      cl::desc("Enable BFI update verification for "
@@ -304,6 +314,7 @@
   const TargetTransformInfo *TTI = nullptr;
   const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
   const TargetLibraryInfo *TLInfo = nullptr;
+  std::unique_ptr<DominatorTree> DT;
   LoopInfo *LI = nullptr;
   std::unique_ptr<BlockFrequencyInfo> BFI;
   std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -355,9 +366,6 @@
   /// DataLayout for the Function being processed.
   const DataLayout *DL = nullptr;
 
-  /// Building the dominator tree can be expensive, so we only build it
-  /// lazily and update it when required.
-  std::unique_ptr<DominatorTree> DT;
 
 public:
   /// If encounter huge function, we need to limit the build time.
@@ -417,12 +425,6 @@
     }
   }
 
-  // Get the DominatorTree, building if necessary.
-  DominatorTree &getDT(Function &F) {
-    if (!DT)
-      DT = std::make_unique<DominatorTree>(F);
-    return *DT;
-  }
 
   void removeAllAssertingVHReferences(Value *V);
   bool eliminateAssumptions(Function &F);
@@ -584,9 +586,9 @@
   IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
 
   // Transformations above may invalidate dominator tree and/or loop info.
-  DT.reset();
+  DT = std::make_unique<DominatorTree>(F);
   LI->releaseMemory();
-  LI->analyze(getDT(F));
+  LI->analyze(*DT);
 
   bool MadeChange = true;
   bool FuncIterated = false;
@@ -600,9 +602,6 @@
       ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
       bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
 
-      if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
-        DT.reset();
-
       MadeChange |= Changed;
       if (IsHugeFunc) {
         // If the BB is updated, it may still has chance to be optimized.
@@ -638,8 +637,14 @@
       eliminateFallThrough(F, DT.get());
 
 #ifndef NDEBUG
-    if (MadeChange && VerifyLoopInfo)
-      LI->verify(getDT(F));
+    if (MadeChange) {
+      if (VerifyDT)
+        assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+               "Incorrect DominatorTree updates in CGP");
+
+      if (VerifyLoopInfo)
+        LI->verify(*DT);
+    }
 #endif
 
     // Really free removed instructions during promotion.
@@ -1504,14 +1509,13 @@
     // Finally, we need to ensure that the insert point will dominate all
     // existing uses of the increment.
 
-    auto &DT = getDT(*BO->getParent()->getParent());
-    if (DT.dominates(Cmp->getParent(), BO->getParent()))
+    if (DT->dominates(Cmp->getParent(), BO->getParent()))
       // If we're moving up the dom tree, all uses are trivially dominated.
       // (This is the common case for code produced by LSR.)
       return true;
 
     // Otherwise, special case the single use in the phi recurrence.
-    return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
+    return BO->hasOneUse() && DT->dominates(Cmp->getParent(), L->getLoopLatch());
   };
   if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
     // We used to use a dominator tree here to allow multi-block optimization.
@@ -2189,6 +2193,7 @@
 ///
 /// If the transform is performed, return true and set ModifiedDT to true.
 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+                                  DominatorTree &DT,
                                   LoopInfo &LI,
                                   const TargetLowering *TLI,
                                   const DataLayout *DL, ModifyDT &ModifiedDT,
@@ -2216,8 +2221,10 @@
     return false;
 
   // The intrinsic will be sunk behind a compare against zero and branch.
+  DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
   BasicBlock *StartBlock = CountZeros->getParent();
-  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+  BasicBlock *CallBlock = SplitBlock(StartBlock, CountZeros, &DTU, &LI,
+                                     /* MSSAU */ nullptr, "cond.false");
   if (IsHugeFunc)
     FreshBBs.insert(CallBlock);
 
@@ -2225,17 +2232,11 @@
   // in this block to select the result of the intrinsic or the bit-width
   // constant if the input to the intrinsic is zero.
   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
-  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+  BasicBlock *EndBlock = SplitBlock(CallBlock, &*SplitPt, &DTU, &LI,
+                                    /* MSSAU */ nullptr, "cond.end");
   if (IsHugeFunc)
     FreshBBs.insert(EndBlock);
 
-  // Update the LoopInfo. The new blocks are in the same loop as the start
-  // block.
-  if (Loop *L = LI.getLoopFor(StartBlock)) {
-    L->addBasicBlockToLoop(CallBlock, LI);
-    L->addBasicBlockToLoop(EndBlock, LI);
-  }
-
   // Set up a builder to create a compare, conditional branch, and PHI.
   IRBuilder<> Builder(CountZeros->getContext());
   Builder.SetInsertPoint(StartBlock->getTerminator());
@@ -2250,6 +2251,7 @@
   Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
   Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
   StartBlock->getTerminator()->eraseFromParent();
+  DTU.applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}});
 
   // Create a PHI in the end block to select either the output of the intrinsic
   // or the bit width of the operand.
@@ -2410,8 +2412,8 @@
     case Intrinsic::cttz:
     case Intrinsic::ctlz:
       // If counting zeros is expensive, try to avoid it.
-      return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
-                                   IsHugeFunc);
+      return despeculateCountZeros(II, *DT, *LI,TLI, DL, ModifiedDT,
+                                   FreshBBs, IsHugeFunc);
     case Intrinsic::fshl:
     case Intrinsic::fshr:
       return optimizeFunnelShift(II);
@@ -2570,6 +2572,7 @@
   }
 
   bool Changed = false;
+  DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
   for (auto const &TailCallBB : TailCallBBs) {
     // Make sure the call instruction is followed by an unconditional branch to
     // the return block.
@@ -2578,7 +2581,7 @@
       continue;
 
     // Duplicate the return into TailCallBB.
-    (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
+    (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB, &DTU);
     assert(!VerifyBFIUpdates ||
            BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
     BFI->setBlockFreq(
@@ -2591,7 +2594,7 @@
 
   // If we eliminated all predecessors of the block, delete the block now.
   if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
-    BB->eraseFromParent();
+    DTU.deleteBB(BB);
 
   return Changed;
 }
@@ -3285,7 +3288,7 @@
   const TargetRegisterInfo &TRI;
   const DataLayout &DL;
   const LoopInfo &LI;
-  const std::function<const DominatorTree &()> getDTFn;
+  const DominatorTree &DT;
 
   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
   /// the memory instruction that we're computing this address for.
@@ -3322,14 +3325,14 @@
   AddressingModeMatcher(
       SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
       const TargetRegisterInfo &TRI, const LoopInfo &LI,
-      const std::function<const DominatorTree &()> getDTFn, Type *AT,
+      const DominatorTree &DT, Type *AT,
       unsigned AS, Instruction *MI, ExtAddrMode &AM,
       const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
       TypePromotionTransaction &TPT,
       std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
       bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
       : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
-        DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
+        DL(MI->getModule()->getDataLayout()), LI(LI), DT(DT),
         AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
         InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
         LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
@@ -3348,14 +3351,14 @@
   Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
         SmallVectorImpl<Instruction *> &AddrModeInsts,
         const TargetLowering &TLI, const LoopInfo &LI,
-        const std::function<const DominatorTree &()> getDTFn,
+        const DominatorTree &DT,
         const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
         InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
         std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
         bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
     ExtAddrMode Result;
 
-    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
+    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, DT,
                                          AccessTy, AS, MemoryInst, Result,
                                          InsertedInsts, PromotedInsts, TPT,
                                          LargeOffsetGEP, OptSize, PSI, BFI)
@@ -4136,7 +4139,7 @@
         // (Note that we defer the (expensive) domtree base legality check
         // to the very last possible point.)
         if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
-            getDTFn().dominates(IVInc, MemoryInst)) {
+            DT.dominates(IVInc, MemoryInst)) {
           AddrModeInsts.push_back(cast<Instruction>(IVInc));
           AddrMode = TestAddrMode;
           return true;
@@ -5237,7 +5240,7 @@
                                                                       0);
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, DT,
                                   AddressAccessTy, AS, UserI, Result,
                                   InsertedInsts, PromotedInsts, TPT,
                                   LargeOffsetGEP, OptSize, PSI, BFI);
@@ -5343,15 +5346,8 @@
     AddrModeInsts.clear();
     std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
                                                                       0);
-    // Defer the query (and possible computation of) the dom tree to point of
-    // actual use.  It's expected that most address matches don't actually need
-    // the domtree.
-    auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
-      Function *F = MemoryInst->getParent()->getParent();
-      return this->getDT(*F);
-    };
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
-        V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
+        V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, *DT,
         *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
         BFI.get());
 
@@ -6014,7 +6010,7 @@
         continue;
       bool inserted = false;
       for (auto &Pt : CurPts) {
-        if (getDT(F).dominates(Inst, Pt)) {
+        if (DT->dominates(Inst, Pt)) {
           replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
           RemovedInsts.insert(Pt);
           Pt->removeFromParent();
@@ -6023,7 +6019,7 @@
           Changed = true;
           break;
         }
-        if (!getDT(F).dominates(Pt, Inst))
+        if (!DT->dominates(Pt, Inst))
           // Give up if we need to merge in a common dominator as the
           // experiments show it is not profitable.
           continue;
@@ -6966,12 +6962,6 @@
        llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
     return false;
 
-  // The DominatorTree needs to be rebuilt by any consumers after this
-  // transformation. We simply reset here rather than setting the ModifiedDT
-  // flag to avoid restarting the function walk in runOnFunction for each
-  // select optimized.
-  DT.reset();
-
   // Transform a sequence like this:
   //    start:
   //       %cmp = cmp uge i32 %a, %b
@@ -6996,6 +6986,9 @@
   // block and its branch may be optimized away. In that case, one side of the
   // first branch will point directly to select.end, and the corresponding PHI
   // predecessor block will be the start block.
+  // The CFG is altered here and we update the DominatorTree and the LoopInfo,
+  // but we don't set a ModifiedDT flag to avoid restarting the function walk in
+  // runOnFunction for each select optimized.
 
   // Collect values that go on the true side and the values that go on the false
   // side.
@@ -7009,6 +7002,7 @@
 
   // Split the select block, according to how many (if any) values go on each
   // side.
+  DomTreeUpdater DTU(DT.get(), DomTreeUpdater::UpdateStrategy::Lazy);
   BasicBlock *StartBlock = SI->getParent();
   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
 
@@ -7021,20 +7015,20 @@
   BranchInst *TrueBranch = nullptr;
   BranchInst *FalseBranch = nullptr;
   if (TrueInstrs.size() == 0) {
-    FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
-        CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+    FalseBranch = cast<BranchInst>(
+        SplitBlockAndInsertIfElse(CondFr, &*SplitPt, false, nullptr, &DTU, LI));
     FalseBlock = FalseBranch->getParent();
     EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
   } else if (FalseInstrs.size() == 0) {
-    TrueBranch =
-        cast<BranchInst>(SplitBlockAndInsertIfThen(CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+    TrueBranch = cast<BranchInst>(
+        SplitBlockAndInsertIfThen(CondFr, &*SplitPt, false, nullptr, &DTU, LI));
     TrueBlock = TrueBranch->getParent();
     EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
   } else {
     Instruction *ThenTerm = nullptr;
     Instruction *ElseTerm = nullptr;
     SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
-                                  nullptr, nullptr, LI);
+                                  nullptr, &DTU, LI);
     TrueBranch = cast<BranchInst>(ThenTerm);
     FalseBranch = cast<BranchInst>(ElseTerm);
     TrueBlock = TrueBranch->getParent();
@@ -8311,13 +8305,9 @@
         // For huge function we tend to quickly go though the inner optmization
         // opportunities in the BB. So we go back to the BB head to re-optimize
         // each instruction instead of go back to the function head.
-        if (IsHugeFunc) {
-          DT.reset();
-          getDT(*BB.getParent());
+        if (IsHugeFunc)
           break;
-        } else {
-          return true;
-        }
+         return true;
       }
     }
   } while (ModifiedDT == ModifyDT::ModifyInstDT);