diff --git a/llvm/include/llvm/Analysis/MergeSets.h b/llvm/include/llvm/Analysis/MergeSets.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Analysis/MergeSets.h
@@ -0,0 +1,89 @@
+//===- MergeSets.h - Calculate MergeSets ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Build the merge sets for a given dominator tree. The merge set of a node N
+/// is the set of nodes that may need a PHI node for a definition in N. Based on
+/// the algorithm from
+///
+/// Dibyendu Das and U. Ramakrishna. 2005. A practical and fast iterative
+///  algorithm for φ-function computation using DJ graphs.
+///  ACM Trans. Program. Lang. Syst. 27, 3 (May 2005), 426-440.
+///
+/// It has been modified to not explicitly use the DJ graph data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MERGESETS_H
+#define LLVM_ANALYSIS_MERGESETS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFGDiff.h"
+#include "llvm/IR/Dominators.h"
+
+#include <unordered_set>
+namespace llvm {
+
+class MergeSets {
+  DominatorTree &DT;
+  DenseMap<BasicBlock *, unsigned> &BBNumbers;
+  size_t NumBBs;
+
+  struct PairHasher {
+    std::size_t operator()(const std::pair<unsigned, unsigned> &Key) const {
+      return Key.first ^
+             (Key.second + (Key.first << 6) + (Key.first >> 2) + 0x9e3779b9);
+    }
+  };
+
+  std::unordered_set<std::pair<unsigned, unsigned>, PairHasher> Visited;
+  SmallVector<BitVector, 128> Sets;
+  DenseMap<unsigned, BasicBlock *> NumToBB;
+
+  BitVector &getSet(DomTreeNode *N) { return Sets[getNum(N)]; }
+
+  BitVector &getSet(BasicBlock *BB) { return Sets[getNum(BB)]; }
+
+  BitVector &getSet(unsigned i) { return Sets[i]; }
+
+  const BitVector &getSet(unsigned i) const { return Sets[i]; }
+
+  unsigned getNum(BasicBlock *BB) const { return BBNumbers.find(BB)->second; };
+  unsigned getNum(DomTreeNode *N) const { return getNum(N->getBlock()); };
+
+  bool isInconsistentJ(DomTreeNode *Src, DomTreeNode *Dst) const;
+
+  void dumpSet(DomTreeNode *N);
+
+public:
+  MergeSets(DominatorTree &DT, DenseMap<BasicBlock *, unsigned> &BBNumbers)
+      : DT(DT), BBNumbers(BBNumbers), NumBBs(BBNumbers.size()),
+        Sets(NumBBs, BitVector(NumBBs)) {
+
+    // Reverse mapping from number to BB.
+    NumToBB.reserve(BBNumbers.size());
+    for (auto &P : BBNumbers)
+      NumToBB[P.second] = P.first;
+  }
+
+  /// Returns the merge set for nodes \p DefBBs.
+  SmallVector<BasicBlock *, 8> get(const SmallPtrSetImpl<BasicBlock *> &DefBBs);
+
+  /// Compute merge sets for CFG rooted at \p Root using TDMSC-I from the paper.
+  void build(BasicBlock *Root);
+
+  /// Compute merge sets for CFG rooted at \p Root using TDMSC-II from the
+  /// paper.
+  void build2(BasicBlock *Root);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -65,6 +65,7 @@
   MemorySSAUpdater.cpp
   ModuleDebugInfoPrinter.cpp
   ModuleSummaryAnalysis.cpp
+  MergeSets.cpp
   MustExecute.cpp
   ObjCARCAliasAnalysis.cpp
   ObjCARCAnalysisUtils.cpp
diff --git a/llvm/lib/Analysis/MergeSets.cpp b/llvm/lib/Analysis/MergeSets.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Analysis/MergeSets.cpp
@@ -0,0 +1,208 @@
+//===- MergeSets.cpp - Compute MergeSets ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Compute the MergeSets for a dominator tree.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MergeSets.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include <queue>
+
+namespace llvm {
+
+#define DEBUG_TYPE "mergesets"
+
+STATISTIC(Num1Round, "Number times merge sets valid after one round.");
+STATISTIC(Num2Rounds, "Number times merge sets valid after two rounds.");
+STATISTIC(Num3Rounds,
+          "Number times merge sets valid after three or more rounds.");
+
+bool MergeSets::isInconsistentJ(DomTreeNode *Src, DomTreeNode *Dst) const {
+  if (!Src || DT.properlyDominates(Src, Dst))
+    return false;
+
+  unsigned SrcNum = getNum(Src);
+  unsigned DstNum = getNum(Dst);
+  if (Visited.find({SrcNum, DstNum}) == Visited.end())
+    return false;
+
+  BitVector Both = getSet(SrcNum);
+  unsigned SrcDCount = Both.count();
+  Both |= getSet(DstNum);
+  if (Both.count() == SrcDCount)
+    return false;
+
+  return true;
+}
+
+void MergeSets::build(BasicBlock *Root) {
+  DT.updateDFSNumbers();
+  PredIteratorCache PredCache;
+
+  bool Done = false;
+  unsigned Round = 0;
+  while (!Done) {
+    Round++;
+    Done = true;
+    Visited.clear();
+    std::queue<DomTreeNode *> WorkQueue;
+    WorkQueue.push(DT.getNode(Root));
+
+    while (!WorkQueue.empty()) {
+      DomTreeNode *N = WorkQueue.front();
+      WorkQueue.pop();
+      for (auto *C : N->getChildren())
+        WorkQueue.push(C);
+
+      for (BasicBlock *P : PredCache.get(N->getBlock())) {
+        auto *PDom = DT.getNode(P);
+        if (!PDom || DT.properlyDominates(PDom, N))
+          continue;
+
+        unsigned PNum = getNum(P);
+        unsigned NNum = getNum(N);
+        if (!Visited.insert({PNum, NNum}).second)
+          continue;
+
+        DomTreeNode *Tmp = PDom;
+        DomTreeNode *LNode = nullptr;
+        const auto &S2 = getSet(N);
+        while (Tmp->getLevel() >= N->getLevel()) {
+          auto &TmpS = getSet(Tmp);
+          TmpS |= S2;
+          TmpS.set(NNum);
+          LNode = Tmp;
+          Tmp = Tmp->getIDom();
+        }
+
+        // If any incoming edge is inconsistent, we need to do another round
+        // of the algorithm.
+        if (any_of(PredCache.get(LNode->getBlock()), [&LNode, this](
+                                                          BasicBlock *Src) {
+              return this->isInconsistentJ(this->DT.getNode(Src), LNode);
+            })) {
+          Done = false;
+          break;
+        }
+      }
+    }
+  }
+
+  if (Round == 1)
+    Num1Round++;
+  else if (Round == 2)
+    Num2Rounds++;
+  else
+    Num3Rounds++;
+}
+
+void MergeSets::build2(BasicBlock *Root) {
+  DT.updateDFSNumbers();
+
+  PredIteratorCache PredCache;
+
+  bool Done = false;
+  unsigned Round = 0;
+  while (!Done) {
+    Round++;
+    Done = true;
+    Visited.clear();
+    std::queue<DomTreeNode *> WorkQueue;
+    WorkQueue.push(DT.getNode(Root));
+
+    while (!WorkQueue.empty()) {
+      DomTreeNode *N = WorkQueue.front();
+      WorkQueue.pop();
+      for (auto *C : N->getChildren())
+        WorkQueue.push(C);
+
+      for (BasicBlock *P : PredCache.get(N->getBlock())) {
+        auto *PDom = DT.getNode(P);
+        if (!PDom || DT.properlyDominates(PDom, N))
+          continue;
+
+        unsigned PNum = getNum(P);
+        unsigned NNum = getNum(N);
+        if (!Visited.insert({PNum, NNum}).second)
+          continue;
+
+        DomTreeNode *Tmp = PDom;
+        DomTreeNode *LNode = nullptr;
+        const auto &S2 = getSet(N);
+        while (Tmp->getLevel() >= N->getLevel()) {
+          auto &TmpS = getSet(Tmp);
+          TmpS |= S2;
+          TmpS.set(NNum);
+          LNode = Tmp;
+          Tmp = Tmp->getIDom();
+        }
+
+        for (BasicBlock *Src : PredCache.get(LNode->getBlock())) {
+          auto *SrcD = DT.getNode(Src);
+          if (!isInconsistentJ(SrcD, LNode))
+            continue;
+
+          DomTreeNode *NN = SrcD;
+          DomTreeNode *LNode1 = nullptr;
+          auto &S2 = getSet(LNode);
+          while (NN->getLevel() >= LNode->getLevel()) {
+            auto &TmpS = getSet(NN);
+            TmpS |= S2;
+            LNode1 = NN;
+            NN = NN->getIDom();
+          }
+
+          // If any incoming edge is inconsistent, we need to do another round
+          // of the algorithm.
+          if (any_of(PredCache.get(LNode1->getBlock()), [&LNode1, this](
+                                                            BasicBlock *Src) {
+                return this->isInconsistentJ(this->DT.getNode(Src), LNode1);
+              })) {
+            Done = false;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  if (Round == 1)
+    Num1Round++;
+  else if (Round == 2)
+    Num2Rounds++;
+  else
+    Num3Rounds++;
+}
+
+SmallVector<BasicBlock *, 8>
+MergeSets::get(const SmallPtrSetImpl<BasicBlock *> &DefBBs) {
+  SmallVector<BasicBlock *, 8> Result;
+  BitVector M(BBNumbers.size());
+  for (auto *BB : DefBBs) {
+    assert(DT.isReachableFromEntry(BB) && "Node not reachable.");
+    M |= getSet(BB);
+  }
+
+  for (unsigned I : M.set_bits())
+    Result.push_back(NumToBB[I]);
+  return Result;
+}
+
+void MergeSets::dumpSet(DomTreeNode *N) {
+  SmallPtrSet<BasicBlock *, 1> S;
+  S.insert(N->getBlock());
+  dbgs() << "  MS for " << N->getBlock()->getName() << " : ";
+  for (auto *BB : get(S))
+    dbgs() << BB->getName() << " ";
+  dbgs() << "\n";
+}
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,7 +25,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/MergeSets.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -44,7 +44,9 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 #include <cassert>
@@ -530,13 +532,14 @@
 
 void PromoteMem2Reg::run() {
   Function &F = *DT.getRoot()->getParent();
-
+  /*  if (F.getName() == "gen_pic_list_from_frame_list")*/
+  // F.getParent()->dump();
   AllocaDbgDeclares.resize(Allocas.size());
 
   AllocaInfo Info;
   LargeBlockInfo LBI;
-  ForwardIDFCalculator IDF(DT);
 
+  SmallVector<std::pair<unsigned, AllocaInfo>, 32> NeedPHIs;
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
 
@@ -600,35 +603,72 @@
     // nodes and see if we can optimize out some work by avoiding insertion of
     // dead phi nodes.
 
-    // Unique the set of defining blocks for efficient lookup.
-    SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
-                                            Info.DefiningBlocks.end());
-
-    // Determine which blocks the value is live in.  These are blocks which lead
-    // to uses.
-    SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
-    ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
-
-    // At this point, we're committed to promoting the alloca using IDF's, and
-    // the standard SSA construction algorithm.  Determine which blocks need phi
-    // nodes and see if we can optimize out some work by avoiding insertion of
-    // dead phi nodes.
-    IDF.setLiveInBlocks(LiveInBlocks);
-    IDF.setDefiningBlocks(DefBlocks);
-    SmallVector<BasicBlock *, 32> PHIBlocks;
-    IDF.calculate(PHIBlocks);
-    llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
-      return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
-    });
-
-    unsigned CurrentVersion = 0;
-    for (BasicBlock *BB : PHIBlocks)
-      QueuePhiNode(BB, AllocaNum, CurrentVersion);
+    NeedPHIs.emplace_back(AllocaNum, std::move(Info));
   }
 
   if (Allocas.empty())
     return; // All of the allocas must have been trivial!
 
+  if (!NeedPHIs.empty()) {
+    MergeSets MS(DT, BBNumbers);
+    MS.build2(&F.getEntryBlock());
+
+    for (auto &P : NeedPHIs) {
+      auto &Info = P.second;
+      unsigned AllocaNum = P.first;
+      // Unique the set of defining blocks for efficient lookup.
+      SmallPtrSet<BasicBlock *, 32> DefBlocks;
+      for (auto *BB : Info.DefiningBlocks)
+        if (DT.isReachableFromEntry(BB))
+          DefBlocks.insert(BB);
+
+      auto AI = Allocas[AllocaNum];
+
+      // Determine which blocks the value is live in.  These are blocks which
+      // lead to uses.
+      SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+      ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+      // At this point, we're committed to promoting the alloca using IDF's, and
+      // the standard SSA construction algorithm.  Determine which blocks need
+      // phi nodes and see if we can optimize out some work by avoiding
+      // insertion of dead phi nodes.
+      auto PHIBlocks = MS.get(DefBlocks);
+      if (PHIBlocks.size() > 1)
+        llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+          return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+        });
+
+      unsigned CurrentVersion = 0;
+      unsigned NumAdded = 0;
+      for (BasicBlock *BB : PHIBlocks) {
+        if (!LiveInBlocks.count(BB))
+          continue;
+        NumAdded++;
+        QueuePhiNode(BB, AllocaNum, CurrentVersion);
+      }
+#ifndef NDEBUG
+      // Verify that we added the same number of  PHI nodes using merge sets as
+      // with the iterated dominance frontier.
+      ForwardIDFCalculator IDF(DT);
+      IDF.setLiveInBlocks(LiveInBlocks);
+      IDF.setDefiningBlocks(DefBlocks);
+      SmallVector<BasicBlock *, 32> IDFPHIBlocks;
+      IDF.calculate(IDFPHIBlocks);
+
+      // Make sure both approaches result in the same number of PHIs placed.
+      assert(NumAdded == IDFPHIBlocks.size() && "Number of blocks mismatched");
+
+      SmallPtrSet<BasicBlock *, 8> PlacedBlockSet;
+      PlacedBlockSet.insert(PHIBlocks.begin(), PHIBlocks.end());
+
+      // Check all PHI blocks computed by IDF are also found using merge sets.
+      for (auto *BB : IDFPHIBlocks)
+        assert(PlacedBlockSet.count(BB) && "missed a block");
+#endif
+    }
+  }
+
   LBI.clear();
 
   // Set the incoming values for the basic block to be null values for all of
@@ -763,6 +803,10 @@
   }
 
   NewPhiNodes.clear();
+  /*  if (verifyFunction(F, &dbgs())) {*/
+  // F.dump();
+  // assert(false);
+  /*}*/
 }
 
 /// Determine which blocks the value is live in.
diff --git a/llvm/test/Transforms/Mem2Reg/merge-sets.ll b/llvm/test/Transforms/Mem2Reg/merge-sets.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/Mem2Reg/merge-sets.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -mem2reg | FileCheck %s
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND:%.*]], label [[IF_END46:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    [[TOP_IDX_0:%.*]] = phi i32 [ undef, [[IF_END]] ], [ [[INC16:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NEXT:    br label [[LOR_RHS:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_BODY]], label [[IF_END46]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[INC16]] = add nsw i32 undef, 1
+; CHECK-NEXT:    br label [[WHILE_COND]]
+; CHECK:       if.end46:
+; CHECK-NEXT:    [[TOP_IDX_1:%.*]] = phi i32 [ [[TOP_IDX_0]], [[LOR_RHS]] ], [ undef, [[IF_END]] ]
+; CHECK-NEXT:    br label [[WHILE_COND49:%.*]]
+; CHECK:       while.cond49:
+; CHECK-NEXT:    [[IDXPROM97:%.*]] = sext i32 [[TOP_IDX_1]] to i64
+; CHECK-NEXT:    br label [[WHILE_COND49]]
+;
+entry:
+  %top_idx = alloca i32, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %while.cond, label %if.end46
+
+while.cond:                                       ; preds = %while.body, %if.end
+  br label %lor.rhs
+
+lor.rhs:                                          ; preds = %while.cond
+  br i1 undef, label %while.body, label %if.end46
+
+while.body:                                       ; preds = %lor.rhs
+  %inc16 = add nsw i32 undef, 1
+  store i32 %inc16, i32* %top_idx, align 4
+  br label %while.cond
+
+if.end46:                                         ; preds = %lor.rhs, %if.end
+  br label %while.cond49
+
+while.cond49:                                     ; preds = %while.cond49, %if.end46
+  %top_idx.0.load139 = load i32, i32* %top_idx, align 4
+  %idxprom97 = sext i32 %top_idx.0.load139 to i64
+  br label %while.cond49
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND85:%.*]], label [[IF_END105:%.*]]
+; CHECK:       while.cond85:
+; CHECK-NEXT:    [[__M_0:%.*]] = phi double* [ undef, [[SW_EPILOG]] ], [ [[V1:%.*]], [[IF_END98:%.*]] ]
+; CHECK-NEXT:    br i1 undef, label [[IF_END105]], label [[IF_END98]]
+; CHECK:       if.end98:
+; CHECK-NEXT:    [[V1]] = load double*, double** undef, align 8
+; CHECK-NEXT:    br label [[WHILE_COND85]]
+; CHECK:       if.end105:
+; CHECK-NEXT:    [[__M_1:%.*]] = phi double* [ [[__M_0]], [[WHILE_COND85]] ], [ undef, [[SW_EPILOG]] ]
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %__m = alloca double*, align 8
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry
+  br i1 undef, label %while.cond85, label %if.end105
+
+while.cond85:                                     ; preds = %if.end98, %sw.epilog
+  br i1 undef, label %if.end105, label %if.end98
+
+if.end98:                                         ; preds = %while.cond85
+  %v1 = load double*, double** undef, align 8
+  store double* %v1, double** %__m, align 8
+  br label %while.cond85
+
+if.end105:                                        ; preds = %while.cond85, %sw.epilog
+  %tmp1 = load double*, double** %__m, align 8
+  unreachable
+}