diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -66,6 +66,44 @@
   }
 }
 
+template <typename ContextT>
+auto GenericCycle<ContextT>::getCyclePreheader() const -> BlockT * {
+  BlockT *Predecessor = getCyclePredecessor();
+  if (!Predecessor)
+    return nullptr;
+
+  assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!");
+
+  if (succ_size(Predecessor) != 1)
+    return nullptr;
+
+  // Make sure we are allowed to hoist instructions into the predecessor.
+  if (!Predecessor->isLegalToHoistInto())
+    return nullptr;
+
+  return Predecessor;
+}
+
+template <typename ContextT>
+auto GenericCycle<ContextT>::getCyclePredecessor() const -> BlockT * {
+  if (!isReducible())
+    return nullptr;
+
+  BlockT *Out = nullptr;
+
+  // Loop over the predecessors of the header node...
+  BlockT *Header = getHeader();
+  for (const auto Pred : predecessors(Header)) {
+    if (!contains(Pred)) {
+      if (Out && Out != Pred)
+        return nullptr;
+      Out = Pred;
+    }
+  }
+
+  return Out;
+}
+
 /// \brief Helper class for computing cycle information.
 template <typename ContextT> class GenericCycleInfoCompute {
   using BlockT = typename ContextT::BlockT;
@@ -326,6 +364,18 @@
   return nullptr;
 }
 
+/// \brief get the depth for the cycle which containing a given block.
+///
+/// \returns the depth for the innermost cycle containing \p Block or 0 if it is
+///          not contained in any cycle.
+template <typename ContextT>
+unsigned GenericCycleInfo<ContextT>::getCycleDepth(const BlockT *Block) const {
+  CycleT *Cycle = getCycle(Block);
+  if (!Cycle)
+    return 0;
+  return Cycle->getDepth();
+}
+
 /// \brief Validate the internal consistency of the cycle tree.
 ///
 /// Note that this does \em not check that cycles are really cycles in the CFG,
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -100,6 +100,10 @@
 
   BlockT *getHeader() const { return Entries[0]; }
 
+  const SmallVectorImpl<BlockT *> & getEntries() const {
+    return Entries;
+  }
+
   /// \brief Return whether \p Block is an entry block of the cycle.
   bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); }
 
@@ -124,6 +128,16 @@
   /// branched to.
   void getExitBlocks(SmallVectorImpl<BlockT *> &TmpStorage) const;
 
+  /// Return the preheader block for this cycle. Pre-header is well-defined for
+  /// reducible cycle in docs/LoopTerminology.rst as: the only one entering
+  /// block and its only edge is to the entry block. Return null for irreducible
+  /// cycles.
+  BlockT *getCyclePreheader() const;
+
+  /// If the cycle has exactly one entry with exactly one predecessor, return
+  /// it, otherwise return nullptr.
+  BlockT *getCyclePredecessor() const;
+
   /// Iteration over child cycles.
   //@{
   using const_child_iterator_base =
@@ -239,6 +253,7 @@
   const ContextT &getSSAContext() const { return Context; }
 
   CycleT *getCycle(const BlockT *Block) const;
+  unsigned getCycleDepth(const BlockT *Block) const;
   CycleT *getTopLevelParentCycle(const BlockT *Block) const;
 
   /// Move \p Child to \p NewParent by manipulating Children vectors.
diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
--- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
@@ -16,6 +16,8 @@
 
 #include "llvm/ADT/GenericCycleInfo.h"
 #include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
 
 namespace llvm {
 
@@ -25,6 +27,41 @@
 using MachineCycleInfo = GenericCycleInfo<MachineSSAContext>;
 using MachineCycle = MachineCycleInfo::CycleT;
 
+/// Legacy analysis pass which computes a \ref MachineCycleInfo.
+class MachineCycleInfoWrapperPass : public MachineFunctionPass {
+  MachineFunction *F = nullptr;
+  MachineCycleInfo CI;
+
+public:
+  static char ID;
+
+  MachineCycleInfoWrapperPass();
+
+  MachineCycleInfo &getCycleInfo() { return CI; }
+  const MachineCycleInfo &getCycleInfo() const { return CI; }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+  // TODO: verify analysis
+};
+
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+  static char ID;
+
+  MachineCycleInfoPrinterPass();
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+// TODO: add this function to GenericCycle template after implementing IR
+//       version.
+bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H
diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h
--- a/llvm/include/llvm/CodeGen/MachineSSAContext.h
+++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h
@@ -28,6 +28,8 @@
 
 inline auto successors(MachineBasicBlock *BB) { return BB->successors(); }
 inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); }
+inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); }
+inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); }
 
 template <> class GenericSSAContext<MachineFunction> {
   const MachineRegisterInfo *RegInfo = nullptr;
diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
--- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -8,50 +8,15 @@
 
 #include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineSSAContext.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
 template class llvm::GenericCycle<llvm::MachineSSAContext>;
 
-namespace {
-
-/// Legacy analysis pass which computes a \ref MachineCycleInfo.
-class MachineCycleInfoWrapperPass : public MachineFunctionPass {
-  MachineFunction *F = nullptr;
-  MachineCycleInfo CI;
-
-public:
-  static char ID;
-
-  MachineCycleInfoWrapperPass();
-
-  MachineCycleInfo &getCycleInfo() { return CI; }
-  const MachineCycleInfo &getCycleInfo() const { return CI; }
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void releaseMemory() override;
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
-  // TODO: verify analysis
-};
-
-class MachineCycleInfoPrinterPass : public MachineFunctionPass {
-public:
-  static char ID;
-
-  MachineCycleInfoPrinterPass();
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-
-} // namespace
-
 char MachineCycleInfoWrapperPass::ID = 0;
 
 MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
@@ -111,3 +76,62 @@
   CI.print(errs());
   return false;
 }
+
+bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
+  MachineFunction *MF = I.getParent()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+  const TargetInstrInfo *TII = ST.getInstrInfo();
+
+  // The instruction is cycle invariant if all of its operands are.
+  for (const MachineOperand &MO : I.operands()) {
+    if (!MO.isReg())
+      continue;
+
+    Register Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // An instruction that uses or defines a physical register can't e.g. be
+    // hoisted, so mark this as not invariant.
+    if (Register::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        // However, if the physreg is known to always be caller saved/restored
+        // then this use is safe to hoist.
+        if (!MRI->isConstantPhysReg(Reg) &&
+            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+            !TII->isIgnorableUse(MO))
+          return false;
+        // Otherwise it's safe to move.
+        continue;
+      } else if (!MO.isDead()) {
+        // A def that isn't dead can't be moved.
+        return false;
+      } else if (any_of(Cycle->getEntries(),
+                        [&](const MachineBasicBlock *Block) {
+                          return Block->isLiveIn(Reg);
+                        })) {
+        // If the reg is live into any header of the cycle we can't hoist an
+        // instruction which would clobber it.
+        return false;
+      }
+    }
+
+    if (!MO.isUse())
+      continue;
+
+    assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
+
+    // If the cycle contains the definition of an operand, then the instruction
+    // isn't cycle invariant.
+    if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
+      return false;
+  }
+
+  // If we got this far, the instruction is cycle invariant!
+  return true;
+}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -95,18 +96,18 @@
     cl::init(20), cl::Hidden);
 
 static cl::opt<bool>
-SinkInstsIntoLoop("sink-insts-to-avoid-spills",
-                  cl::desc("Sink instructions into loops to avoid "
-                           "register spills"),
-                  cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> SinkIntoLoopLimit(
-    "machine-sink-loop-limit",
-    cl::desc("The maximum number of instructions considered for loop sinking."),
+    SinkInstsIntoCycle("sink-insts-to-avoid-spills",
+                       cl::desc("Sink instructions into cycles to avoid "
+                                "register spills"),
+                       cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoCycleLimit(
+    "machine-sink-cycle-limit",
+    cl::desc("The maximum number of instructions considered for cycle sinking."),
     cl::init(50), cl::Hidden);
 
 STATISTIC(NumSunk,      "Number of machine instructions sunk");
-STATISTIC(NumLoopSunk,  "Number of machine instructions sunk into a loop");
+STATISTIC(NumCycleSunk,  "Number of machine instructions sunk into a cycle");
 STATISTIC(NumSplit,     "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -119,7 +120,7 @@
     MachineRegisterInfo  *MRI;     // Machine register information
     MachineDominatorTree *DT;      // Machine dominator tree
     MachinePostDominatorTree *PDT; // Machine post dominator tree
-    MachineLoopInfo *LI;
+    MachineCycleInfo *CI;
     MachineBlockFrequencyInfo *MBFI;
     const MachineBranchProbabilityInfo *MBPI;
     AliasAnalysis *AA;
@@ -180,8 +181,9 @@
       AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachinePostDominatorTree>();
-      AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineCycleInfoWrapperPass>();
       AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addPreserved<MachineCycleInfoWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
       if (UseBlockFreqInfo)
         AU.addRequired<MachineBlockFrequencyInfo>();
@@ -232,9 +234,9 @@
     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
 
-    void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
-                                SmallVectorImpl<MachineInstr *> &Candidates);
-    bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+    void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
+                                 SmallVectorImpl<MachineInstr *> &Candidates);
+    bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
 
     bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                               MachineBasicBlock *MBB,
@@ -261,7 +263,7 @@
                       "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
                     "Machine code sinking", false, false)
@@ -378,26 +380,27 @@
   return false;
 }
 
-void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+void MachineSinking::FindCycleSinkCandidates(
+    MachineCycle *Cycle, MachineBasicBlock *BB,
     SmallVectorImpl<MachineInstr *> &Candidates) {
   for (auto &MI : *BB) {
-    LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
+    LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
     if (!TII->shouldSink(MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
+      LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
                            "target\n");
       continue;
     }
-    if (!L->isLoopInvariant(MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
+    if (!isCycleInvariant(Cycle, MI)) {
+      LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
       continue;
     }
     bool DontMoveAcrossStore = true;
     if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
+      LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
       continue;
     }
     if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
+      LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
       continue;
     }
     if (MI.isConvergent())
@@ -409,7 +412,7 @@
     if (!MRI->hasOneDef(MO.getReg()))
       continue;
 
-    LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
+    LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
     Candidates.push_back(&MI);
   }
 }
@@ -425,22 +428,12 @@
   MRI = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
   PDT = &getAnalysis<MachinePostDominatorTree>();
-  LI = &getAnalysis<MachineLoopInfo>();
+  CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   RegClassInfo.runOnMachineFunction(MF);
 
-  // MachineSink currently uses MachineLoopInfo, which only recognizes natural
-  // loops. As such, we could sink instructions into irreducible cycles, which
-  // would be non-profitable.
-  // WARNING: The current implementation of hasStoreBetween() is incorrect for
-  // sinking into irreducible cycles (PR53990), this bailout is currently
-  // necessary for correctness, not just profitability.
-  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
-  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
-    return false;
-
   bool EverMadeChange = false;
 
   while (true) {
@@ -473,32 +466,33 @@
     EverMadeChange = true;
   }
 
-  if (SinkInstsIntoLoop) {
-    SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
-    for (auto *L : Loops) {
-      MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
+  if (SinkInstsIntoCycle) {
+    SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
+                                          CI->toplevel_end());
+    for (auto *Cycle : Cycles) {
+      MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
       if (!Preheader) {
-        LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
+        LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
         continue;
       }
       SmallVector<MachineInstr *, 8> Candidates;
-      FindLoopSinkCandidates(L, Preheader, Candidates);
+      FindCycleSinkCandidates(Cycle, Preheader, Candidates);
 
       // Walk the candidates in reverse order so that we start with the use
       // of a def-use chain, if there is any.
       // TODO: Sort the candidates using a cost-model.
       unsigned i = 0;
       for (MachineInstr *I : llvm::reverse(Candidates)) {
-        if (i++ == SinkIntoLoopLimit) {
-          LLVM_DEBUG(dbgs() << "LoopSink:   Limit reached of instructions to "
+        if (i++ == SinkIntoCycleLimit) {
+          LLVM_DEBUG(dbgs() << "CycleSink:   Limit reached of instructions to "
                                "be analysed.");
           break;
         }
 
-        if (!SinkIntoLoop(L, *I))
+        if (!SinkIntoCycle(Cycle, *I))
           break;
         EverMadeChange = true;
-        ++NumLoopSunk;
+        ++NumCycleSunk;
       }
     }
   }
@@ -520,12 +514,12 @@
 
   // Don't bother sinking code out of unreachable blocks. In addition to being
   // unprofitable, it can also lead to infinite looping, because in an
-  // unreachable loop there may be nowhere to stop.
+  // unreachable cycle there may be nowhere to stop.
   if (!DT->isReachableFromEntry(&MBB)) return false;
 
   bool MadeChange = false;
 
-  // Cache all successors, sorted by frequency info and loop depth.
+  // Cache all successors, sorted by frequency info and cycle depth.
   AllSuccsCache AllSuccessors;
 
   // Walk the basic block bottom-up.  Remember if we saw a store.
@@ -644,13 +638,16 @@
   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
     return false;
 
-  // Avoid breaking back edge. From == To means backedge for single BB loop.
+  // Avoid breaking back edge. From == To means backedge for single BB cycle.
   if (!SplitEdges || FromBB == ToBB)
     return false;
 
-  // Check for backedges of more "complex" loops.
-  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
-      LI->isLoopHeader(ToBB))
+  MachineCycle *FromCycle = CI->getCycle(FromBB);
+  MachineCycle *ToCycle = CI->getCycle(ToBB);
+
+  // Check for backedges of more "complex" cycles.
+  if (FromCycle == ToCycle && FromCycle &&
+      (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
     return false;
 
   // It's not always legal to break critical edges and sink the computation
@@ -753,9 +750,9 @@
   if (!PDT->dominates(SuccToSinkTo, MBB))
     return true;
 
-  // It is profitable to sink an instruction from a deeper loop to a shallower
-  // loop, even if the latter post-dominates the former (PR21115).
-  if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+  // It is profitable to sink an instruction from a deeper cycle to a shallower
+  // cycle, even if the latter post-dominates the former (PR21115).
+  if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
     return true;
 
   // Check if only use in post dominated block is PHI instruction.
@@ -776,11 +773,11 @@
           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
 
-  MachineLoop *ML = LI->getLoopFor(MBB);
+  MachineCycle *MCycle = CI->getCycle(MBB);
 
-  // If the instruction is not inside a loop, it is not profitable to sink MI to
+  // If the instruction is not inside a cycle, it is not profitable to sink MI to
   // a post dominate block SuccToSinkTo.
-  if (!ML)
+  if (!MCycle)
     return false;
 
   auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
@@ -798,7 +795,7 @@
     return false;
   };
 
-  // If this instruction is inside a loop and sinking this instruction can make
+  // If this instruction is inside a Cycle and sinking this instruction can make
   // more registers live range shorten, it is still prifitable.
   for (const MachineOperand &MO : MI.operands()) {
     // Ignore non-register operands.
@@ -826,14 +823,15 @@
         return false;
     } else {
       MachineInstr *DefMI = MRI->getVRegDef(Reg);
-      // DefMI is defined outside of loop. There should be no live range
-      // impact for this operand. Defination outside of loop means:
-      // 1: defination is outside of loop.
-      // 2: defination is in this loop, but it is a PHI in the loop header.
-      if (LI->getLoopFor(DefMI->getParent()) != ML ||
-          (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+      MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
+      // DefMI is defined outside of cycle. There should be no live range
+      // impact for this operand. Defination outside of cycle means:
+      // 1: defination is outside of cycle.
+      // 2: defination is in this cycle, but it is a PHI in the cycle header.
+      if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
+                              Cycle->getHeader() == DefMI->getParent()))
         continue;
-      // The DefMI is defined inside the loop.
+      // The DefMI is defined inside the cycle.
       // If sinking this operand makes some register pressure set exceed limit,
       // it is not profitable.
       if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
@@ -843,8 +841,8 @@
     }
   }
 
-  // If MI is in loop and all its operands are alive across the whole loop or if
-  // no operand sinking make register pressure set exceed limit, it is
+  // If MI is in cycle and all its operands are alive across the whole cycle or
+  // if no operand sinking make register pressure set exceed limit, it is
   // profitable to sink MI.
   return true;
 }
@@ -876,14 +874,14 @@
       AllSuccs.push_back(DTChild->getBlock());
   }
 
-  // Sort Successors according to their loop depth or block frequency info.
+  // Sort Successors according to their cycle depth or block frequency info.
   llvm::stable_sort(
       AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
         return HasBlockFreq ? LHSFreq < RHSFreq
-                            : LI->getLoopDepth(L) < LI->getLoopDepth(R);
+                            : CI->getCycleDepth(L) < CI->getCycleDepth(R);
       });
 
   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
@@ -898,7 +896,7 @@
                                  AllSuccsCache &AllSuccessors) {
   assert (MBB && "Invalid MachineBasicBlock!");
 
-  // Loop over all the operands of the specified instruction.  If there is
+  // loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
 
   // SuccToSinkTo - This is the successor to sink this instruction to, once we
@@ -945,7 +943,7 @@
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to. If we have reliable block frequency information
       // (frequency != 0) available, give successors with smaller frequencies
-      // higher priority, otherwise prioritize smaller loop depths.
+      // higher priority, otherwise prioritize smaller cycle depths.
       for (MachineBasicBlock *SuccBlock :
            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
         bool LocalUse = false;
@@ -968,7 +966,7 @@
   }
 
   // It is not possible to sink an instruction into its own block.  This can
-  // happen with loops.
+  // happen with cycles.
   if (MBB == SuccToSinkTo)
     return nullptr;
 
@@ -1222,68 +1220,70 @@
   return HasAliasedStore;
 }
 
-/// Sink instructions into loops if profitable. This especially tries to prevent
-/// register spills caused by register pressure if there is little to no
-/// overhead moving instructions into loops.
-bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
-  LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
-  assert(Preheader && "Loop sink needs a preheader block");
+/// Sink instructions into cycles if profitable. This especially tries to
+/// prevent register spills caused by register pressure if there is little to no
+/// overhead moving instructions into cycles.
+bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
+  LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+  MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+  assert(Preheader && "Cycle sink needs a preheader block");
   MachineBasicBlock *SinkBlock = nullptr;
   bool CanSink = true;
   const MachineOperand &MO = I.getOperand(0);
 
   for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
-    LLVM_DEBUG(dbgs() << "LoopSink:   Analysing use: " << MI);
-    if (!L->contains(&MI)) {
-      LLVM_DEBUG(dbgs() << "LoopSink:   Use not in loop, can't sink.\n");
+    LLVM_DEBUG(dbgs() << "CycleSink:   Analysing use: " << MI);
+    if (!Cycle->contains(MI.getParent())) {
+      LLVM_DEBUG(dbgs() << "CycleSink:   Use not in cycle, can't sink.\n");
       CanSink = false;
       break;
     }
 
     // FIXME: Come up with a proper cost model that estimates whether sinking
-    // the instruction (and thus possibly executing it on every loop
+    // the instruction (and thus possibly executing it on every cycle
     // iteration) is more expensive than a register.
     // For now assumes that copies are cheap and thus almost always worth it.
     if (!MI.isCopy()) {
-      LLVM_DEBUG(dbgs() << "LoopSink:   Use is not a copy\n");
+      LLVM_DEBUG(dbgs() << "CycleSink:   Use is not a copy\n");
       CanSink = false;
       break;
     }
     if (!SinkBlock) {
       SinkBlock = MI.getParent();
-      LLVM_DEBUG(dbgs() << "LoopSink:   Setting sink block to: "
+      LLVM_DEBUG(dbgs() << "CycleSink:   Setting sink block to: "
                         << printMBBReference(*SinkBlock) << "\n");
       continue;
     }
     SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
     if (!SinkBlock) {
-      LLVM_DEBUG(dbgs() << "LoopSink:   Can't find nearest dominator\n");
+      LLVM_DEBUG(dbgs() << "CycleSink:   Can't find nearest dominator\n");
       CanSink = false;
       break;
     }
-    LLVM_DEBUG(dbgs() << "LoopSink:   Setting nearest common dom block: " <<
+    LLVM_DEBUG(dbgs() << "CycleSink:   Setting nearest common dom block: " <<
                printMBBReference(*SinkBlock) << "\n");
   }
 
   if (!CanSink) {
-    LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
+    LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
     return false;
   }
   if (!SinkBlock) {
-    LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
+    LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
     return false;
   }
   if (SinkBlock == Preheader) {
-    LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
+    LLVM_DEBUG(
+        dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
     return false;
   }
   if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
-    LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
+    LLVM_DEBUG(
+        dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
+  LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
   SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
                     I);
 
@@ -1407,9 +1407,11 @@
       TryBreak = true;
     }
 
-    // Don't sink instructions into a loop.
-    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
-      LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+    // Don't sink instructions into a cycle.
+    if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
+        (!CI->getCycle(SuccToSinkTo)->isReducible() ||
+         CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
+      LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
       TryBreak = true;
     }
 
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -132,6 +132,7 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
+; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
--- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
+++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
@@ -1,10 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
-# RUN:   -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN:   -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
 # RUN:   FileCheck %s --check-prefix=SINK1
 #
 # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
-# RUN:   -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN:   -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
 # RUN:   FileCheck %s --check-prefix=SINK2
 
 --- |
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -296,6 +296,7 @@
 ; GCN-O1-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O1-NEXT:        MachinePostDominator Tree Construction
+; GCN-O1-NEXT:        Machine Cycle Info Analysis
 ; GCN-O1-NEXT:        Machine code sinking
 ; GCN-O1-NEXT:        Peephole Optimizations
 ; GCN-O1-NEXT:        Remove dead machine instructions
@@ -574,6 +575,7 @@
 ; GCN-O1-OPTS-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O1-OPTS-NEXT:        MachinePostDominator Tree Construction
+; GCN-O1-OPTS-NEXT:        Machine Cycle Info Analysis
 ; GCN-O1-OPTS-NEXT:        Machine code sinking
 ; GCN-O1-OPTS-NEXT:        Peephole Optimizations
 ; GCN-O1-OPTS-NEXT:        Remove dead machine instructions
@@ -861,6 +863,7 @@
 ; GCN-O2-NEXT:        Machine Block Frequency Analysis
 ; GCN-O2-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O2-NEXT:        MachinePostDominator Tree Construction
+; GCN-O2-NEXT:        Machine Cycle Info Analysis
 ; GCN-O2-NEXT:        Machine code sinking
 ; GCN-O2-NEXT:        Peephole Optimizations
 ; GCN-O2-NEXT:        Remove dead machine instructions
@@ -1161,6 +1164,7 @@
 ; GCN-O3-NEXT:        Machine Block Frequency Analysis
 ; GCN-O3-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O3-NEXT:        MachinePostDominator Tree Construction
+; GCN-O3-NEXT:        Machine Cycle Info Analysis
 ; GCN-O3-NEXT:        Machine code sinking
 ; GCN-O3-NEXT:        Peephole Optimizations
 ; GCN-O3-NEXT:        Remove dead machine instructions
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -91,6 +91,7 @@
 ; CHECK-NEXT:      Machine Block Frequency Analysis
 ; CHECK-NEXT:      Machine Common Subexpression Elimination
 ; CHECK-NEXT:      MachinePostDominator Tree Construction
+; CHECK-NEXT:      Machine Cycle Info Analysis
 ; CHECK-NEXT:      Machine code sinking
 ; CHECK-NEXT:      Peephole Optimizations
 ; CHECK-NEXT:      Remove dead machine instructions
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -111,6 +111,7 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
+; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -90,6 +90,7 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
+; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -107,6 +107,7 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
+; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -32,14 +32,13 @@
 ; CHECK-NEXT:    # implicit-def: $ebp
 ; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_15: # %for.inc
+; CHECK-NEXT:  .LBB0_16: # %for.inc
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    movb %dh, %dl
 ; CHECK-NEXT:  .LBB0_1: # %for.cond
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB0_19 Depth 2
+; CHECK-NEXT:    # Child Loop BB0_20 Depth 2
 ; CHECK-NEXT:    cmpb $8, %dl
 ; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    ja .LBB0_3
@@ -56,7 +55,7 @@
 ; CHECK-NEXT:    movb %cl, %dh
 ; CHECK-NEXT:    movl $0, h
 ; CHECK-NEXT:    cmpb $8, %dl
-; CHECK-NEXT:    jg .LBB0_9
+; CHECK-NEXT:    jg .LBB0_8
 ; CHECK-NEXT:  # %bb.5: # %if.then13
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl %eax, %esi
@@ -65,10 +64,12 @@
 ; CHECK-NEXT:    calll printf
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
 ; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    # implicit-def: $eax
-; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    movb %dh, %dl
-; CHECK-NEXT:    jne .LBB0_15
+; CHECK-NEXT:    jne .LBB0_16
 ; CHECK-NEXT:    jmp .LBB0_6
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_3: # %if.then
@@ -77,82 +78,82 @@
 ; CHECK-NEXT:    calll printf
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
 ; CHECK-NEXT:    # implicit-def: $eax
-; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
-; CHECK-NEXT:    jmp .LBB0_6
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_9: # %if.end21
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    jmp .LBB0_10
-; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_6: # %for.cond35
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movb %dl, %dh
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    movl %edi, %esi
-; CHECK-NEXT:    movl $0, %edi
-; CHECK-NEXT:    movb %cl, %dl
-; CHECK-NEXT:    je .LBB0_19
-; CHECK-NEXT:  # %bb.7: # %af
+; CHECK-NEXT:    je .LBB0_7
+; CHECK-NEXT:  .LBB0_11: # %af
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_8
-; CHECK-NEXT:  .LBB0_16: # %if.end39
+; CHECK-NEXT:    jne .LBB0_12
+; CHECK-NEXT:  .LBB0_17: # %if.end39
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    je .LBB0_18
-; CHECK-NEXT:  # %bb.17: # %if.then41
+; CHECK-NEXT:    je .LBB0_19
+; CHECK-NEXT:  # %bb.18: # %if.then41
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $fn, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $.str, (%esp)
 ; CHECK-NEXT:    calll printf
-; CHECK-NEXT:  .LBB0_18: # %for.end46
+; CHECK-NEXT:  .LBB0_19: # %for.end46
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    # implicit-def: $dl
 ; CHECK-NEXT:    # implicit-def: $dh
 ; CHECK-NEXT:    # implicit-def: $ebp
+; CHECK-NEXT:    jmp .LBB0_20
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_8: # %if.end21
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # implicit-def: $ebp
+; CHECK-NEXT:    jmp .LBB0_9
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_19: # %for.cond47
+; CHECK-NEXT:  .LBB0_7: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movb %dl, %dh
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_20: # %for.cond47
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_19
-; CHECK-NEXT:  # %bb.20: # %for.cond47
-; CHECK-NEXT:    # in Loop: Header=BB0_19 Depth=2
+; CHECK-NEXT:    jne .LBB0_20
+; CHECK-NEXT:  # %bb.21: # %for.cond47
+; CHECK-NEXT:    # in Loop: Header=BB0_20 Depth=2
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_19
-; CHECK-NEXT:  .LBB0_10: # %ae
+; CHECK-NEXT:    jne .LBB0_20
+; CHECK-NEXT:  .LBB0_9: # %ae
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_11
-; CHECK-NEXT:  # %bb.12: # %if.end26
+; CHECK-NEXT:    jne .LBB0_10
+; CHECK-NEXT:  # %bb.13: # %if.end26
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    je .LBB0_15
-; CHECK-NEXT:  # %bb.13: # %if.end26
+; CHECK-NEXT:    je .LBB0_16
+; CHECK-NEXT:  # %bb.14: # %if.end26
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    jne .LBB0_15
-; CHECK-NEXT:  # %bb.14: # %if.then31
+; CHECK-NEXT:    jne .LBB0_16
+; CHECK-NEXT:  # %bb.15: # %if.then31
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    jmp .LBB0_15
+; CHECK-NEXT:    jmp .LBB0_16
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_11: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl %edi, %esi
+; CHECK-NEXT:  .LBB0_10: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    # implicit-def: $eax
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    je .LBB0_16
-; CHECK-NEXT:  .LBB0_8: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    je .LBB0_17
+; CHECK-NEXT:  .LBB0_12: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    # implicit-def: $edi
 ; CHECK-NEXT:    # implicit-def: $cl
+; CHECK-NEXT:    # kill: killed $cl
 ; CHECK-NEXT:    # implicit-def: $dl
 ; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    jmp .LBB0_6
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB0_11
+; CHECK-NEXT:    jmp .LBB0_7
 entry:
   br label %for.cond
 
diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll
--- a/llvm/test/CodeGen/X86/switch-phi-const.ll
+++ b/llvm/test/CodeGen/X86/switch-phi-const.ll
@@ -92,37 +92,37 @@
 ; CHECK-LABEL: switch_trunc_phi_const:
 ; CHECK:       # %bb.0: # %bb0
 ; CHECK-NEXT:    movzbl %dil, %r8d
-; CHECK-NEXT:    movl $3895, %ecx # imm = 0xF37
-; CHECK-NEXT:    movl $42, %esi
-; CHECK-NEXT:    movl $13, %edx
-; CHECK-NEXT:    movl $5, %edi
-; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    decl %r8d
 ; CHECK-NEXT:    cmpl $54, %r8d
 ; CHECK-NEXT:    ja .LBB1_8
 ; CHECK-NEXT:  # %bb.1: # %bb0
+; CHECK-NEXT:    movl $3895, %edx # imm = 0xF37
+; CHECK-NEXT:    movl $42, %edi
+; CHECK-NEXT:    movl $13, %esi
+; CHECK-NEXT:    movl $5, %ecx
+; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    jmpq *.LJTI1_0(,%r8,8)
 ; CHECK-NEXT:  .LBB1_8: # %default
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_2: # %case_1_loop
 ; CHECK-NEXT:    movq effect64@GOTPCREL(%rip), %rcx
 ; CHECK-NEXT:    movq $1, (%rcx)
-; CHECK-NEXT:    movq %rax, %rdi
+; CHECK-NEXT:    movq %rax, %rcx
 ; CHECK-NEXT:  .LBB1_3: # %case_5
 ; CHECK-NEXT:    movq effect64@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq $5, (%rax)
-; CHECK-NEXT:    movq %rdi, %rdx
+; CHECK-NEXT:    movq %rcx, %rsi
 ; CHECK-NEXT:  .LBB1_4: # %case_13
 ; CHECK-NEXT:    movq effect64@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq $13, (%rax)
-; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rsi, %rdi
 ; CHECK-NEXT:  .LBB1_5: # %case_42
 ; CHECK-NEXT:    movq effect64@GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq %rsi, (%rax)
-; CHECK-NEXT:    movl $55, %ecx
+; CHECK-NEXT:    movq %rdi, (%rax)
+; CHECK-NEXT:    movl $55, %edx
 ; CHECK-NEXT:  .LBB1_6: # %case_55
 ; CHECK-NEXT:    movq effect64@GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq %rcx, (%rax)
+; CHECK-NEXT:    movq %rdx, (%rax)
 ; CHECK-NEXT:  .LBB1_7: # %case_7
 ; CHECK-NEXT:    movq g64@GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1377,8 +1377,6 @@
 ; ENABLE-NEXT:    pushq %rbx
 ; ENABLE-NEXT:    pushq %rax
 ; ENABLE-NEXT:    .cfi_offset %rbx, -24
-; ENABLE-NEXT:    movq _irreducibleCFGa@GOTPCREL(%rip), %rax
-; ENABLE-NEXT:    movl (%rax), %edi
 ; ENABLE-NEXT:    movq _irreducibleCFGf@GOTPCREL(%rip), %rax
 ; ENABLE-NEXT:    cmpb $0, (%rax)
 ; ENABLE-NEXT:    je LBB16_2
@@ -1388,20 +1386,24 @@
 ; ENABLE-NEXT:    jmp LBB16_1
 ; ENABLE-NEXT:  LBB16_2: ## %split
 ; ENABLE-NEXT:    movq _irreducibleCFGb@GOTPCREL(%rip), %rax
-; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    cmpl $0, (%rax)
-; ENABLE-NEXT:    je LBB16_4
-; ENABLE-NEXT:  ## %bb.3: ## %for.body4.i
+; ENABLE-NEXT:    je LBB16_3
+; ENABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; ENABLE-NEXT:    movq _irreducibleCFGa@GOTPCREL(%rip), %rax
+; ENABLE-NEXT:    movl (%rax), %edi
 ; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    callq _something
+; ENABLE-NEXT:    jmp LBB16_5
+; ENABLE-NEXT:  LBB16_3:
+; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    .p2align 4, 0x90
-; ENABLE-NEXT:  LBB16_4: ## %for.inc
+; ENABLE-NEXT:  LBB16_5: ## %for.inc
 ; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ENABLE-NEXT:    incl %ebx
 ; ENABLE-NEXT:    cmpl $7, %ebx
-; ENABLE-NEXT:    jl LBB16_4
-; ENABLE-NEXT:  ## %bb.5: ## %fn1.exit
+; ENABLE-NEXT:    jl LBB16_5
+; ENABLE-NEXT:  ## %bb.6: ## %fn1.exit
 ; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    addq $8, %rsp
 ; ENABLE-NEXT:    popq %rbx
@@ -1418,8 +1420,6 @@
 ; DISABLE-NEXT:    pushq %rbx
 ; DISABLE-NEXT:    pushq %rax
 ; DISABLE-NEXT:    .cfi_offset %rbx, -24
-; DISABLE-NEXT:    movq _irreducibleCFGa@GOTPCREL(%rip), %rax
-; DISABLE-NEXT:    movl (%rax), %edi
 ; DISABLE-NEXT:    movq _irreducibleCFGf@GOTPCREL(%rip), %rax
 ; DISABLE-NEXT:    cmpb $0, (%rax)
 ; DISABLE-NEXT:    je LBB16_2
@@ -1429,20 +1429,24 @@
 ; DISABLE-NEXT:    jmp LBB16_1
 ; DISABLE-NEXT:  LBB16_2: ## %split
 ; DISABLE-NEXT:    movq _irreducibleCFGb@GOTPCREL(%rip), %rax
-; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    cmpl $0, (%rax)
-; DISABLE-NEXT:    je LBB16_4
-; DISABLE-NEXT:  ## %bb.3: ## %for.body4.i
+; DISABLE-NEXT:    je LBB16_3
+; DISABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; DISABLE-NEXT:    movq _irreducibleCFGa@GOTPCREL(%rip), %rax
+; DISABLE-NEXT:    movl (%rax), %edi
 ; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    callq _something
+; DISABLE-NEXT:    jmp LBB16_5
+; DISABLE-NEXT:  LBB16_3:
+; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    .p2align 4, 0x90
-; DISABLE-NEXT:  LBB16_4: ## %for.inc
+; DISABLE-NEXT:  LBB16_5: ## %for.inc
 ; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; DISABLE-NEXT:    incl %ebx
 ; DISABLE-NEXT:    cmpl $7, %ebx
-; DISABLE-NEXT:    jl LBB16_4
-; DISABLE-NEXT:  ## %bb.5: ## %fn1.exit
+; DISABLE-NEXT:    jl LBB16_5
+; DISABLE-NEXT:  ## %bb.6: ## %fn1.exit
 ; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    addq $8, %rsp
 ; DISABLE-NEXT:    popq %rbx