Index: include/llvm/CodeGen/AsmPrinter.h
===================================================================
--- include/llvm/CodeGen/AsmPrinter.h
+++ include/llvm/CodeGen/AsmPrinter.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -153,6 +154,24 @@
   /// maintains ownership of the emitters.
   SmallVector<HandlerInfo, 1> Handlers;
 
+  // FIXME: ShrinkWrap2: Find a way to emit CFI directives compatible with
+  // shrink-wrapping. We now emit .cfi_offset and .cfi_restore for saves and
+  // restores, we re-process them to see if the final layout needs more work or
+  // not based on the block order.
+
+  typedef DenseMap<unsigned, BitVector> CSRMap;
+
+  // FIXME: This shouldn't be here.
+  DenseMap<unsigned, unsigned> RegToCSRIdx;
+
+  // FIXME: ShrinkWrap2: Compute CFI save / restore directives based on the
+  // final layout.
+  CSRMap ExtraSaveCFI;
+  CSRMap ExtraRestoreCFI;
+
+  // FIXME: ShrinkWrap2: How does this work with stack shrink-wrapping. Is there
+  // a way to "restore" everything?
+
 public:
   struct SrcMgrDiagInfo {
     SourceMgr SrcMgr;
@@ -294,12 +313,14 @@
   void emitFrameAlloc(const MachineInstr &MI);
 
   enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
-  CFIMoveType needsCFIMoves();
+  CFIMoveType needsCFIMoves() const; // FIXME: ShrinkWrap2: Separate commit.
 
   /// Returns false if needsCFIMoves() == CFI_M_EH for any function
   /// in the module.
   bool needsOnlyDebugCFIMoves() const { return isCFIMoveForDebugging; }
 
+  void generateShrinkWrappingCFI();
+
   bool needsSEHMoves();
 
   /// Print to the current output stream assembly representations of the
Index: include/llvm/CodeGen/MachineFrameInfo.h
===================================================================
--- include/llvm/CodeGen/MachineFrameInfo.h
+++ include/llvm/CodeGen/MachineFrameInfo.h
@@ -15,6 +15,9 @@
 #define LLVM_CODEGEN_MACHINEFRAMEINFO_H
 
 #include "llvm/ADT/SmallVector.h"
+// FIXME: ShrinkWrap2: Temporary hack. Remove.
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <vector>
@@ -22,7 +25,6 @@
 namespace llvm {
 class raw_ostream;
 class MachineFunction;
-class MachineBasicBlock;
 class BitVector;
 class AllocaInst;
 
@@ -42,6 +44,17 @@
   void setFrameIdx(int FI)                       { FrameIdx = FI; }
 };
 
+/// Map a set of registers to a basic block. This is a replacement for CSInfo
+/// with extra information about the location of the saves / restores pinned
+/// to a basic block. One register may appear more than once in the map, as
+/// long as it is associated to a different basic block. The CSIs may share
+/// frame indexes for different registers, for different basic blocks.
+/// Similar to CSInfo, the frame indexes in the CalleeSavedInfo struct are
+/// valid ony if CSIValid is true.
+// FIXME: ShrinkWrap2: Make this a DenseMap<unsigned, BitVector>
+typedef DenseMap<MachineBasicBlock *, std::vector<CalleeSavedInfo>>
+    CalleeSavedMap;
+
 /// The MachineFrameInfo class represents an abstract stack frame until
 /// prolog/epilog code is inserted.  This class is key to allowing stack frame
 /// representation optimizations, such as frame pointer elimination.  It also
@@ -266,12 +279,22 @@
   /// stack objects like arguments so we can't treat them as immutable.
   bool HasTailCall = false;
 
+  // FIXME: ShrinkWrap2: Deprecate.
   /// Not null, if shrink-wrapping found a better place for the prologue.
   MachineBasicBlock *Save = nullptr;
   /// Not null, if shrink-wrapping found a better place for the epilogue.
   MachineBasicBlock *Restore = nullptr;
 
+private:
+  /// Should the PrologEpilogInserter and the various target hooks use the
+  /// information gathered from shrink-wrapping?
+  // FIXME: ShrinkWrap2: Fix name.
+  // FIXME: ShrinkWrap2: Merge shrink-wrapped / non-shrink-wrapped paths.
+  bool ShouldUseShrinkWrap2 = false;
+
 public:
+  // FIXME: ShrinkWrap2: Temporary hack. Remove.
+  RegScavenger *RS;
   explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable,
                             bool ForcedRealign)
       : StackAlignment(StackAlignment), StackRealignable(StackRealignable),
@@ -658,11 +681,24 @@
 
   void setCalleeSavedInfoValid(bool v) { CSIValid = v; }
 
+  // FIXME: ShrinkWrap2: Merge with multiple points.
   MachineBasicBlock *getSavePoint() const { return Save; }
   void setSavePoint(MachineBasicBlock *NewSave) { Save = NewSave; }
   MachineBasicBlock *getRestorePoint() const { return Restore; }
   void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; }
 
+  // FIXME: ShrinkWrap2: Is this the right place for this? This should be
+  // somewhere in PEI or TargetFrameLowering, since they are the only ones using
+  // it.
+  // FIXME: ShrinkWrap2: This gets really messy and we should merge all the
+  // behaviour for both shrink-wrapping passes and with it disabled.
+  // FIXME: ShrinkWrap2: Name.
+  // FIXME: ShrinkWrap2: Merge shrink-wrapped / non-shrink-wrapped paths.
+  bool getShouldUseShrinkWrap2() const { return ShouldUseShrinkWrap2; }
+  // FIXME: ShrinkWrap2: Name.
+  // FIXME: ShrinkWrap2: Merge shrink-wrapped / non-shrink-wrapped paths.
+  void setShouldUseShrinkWrap2(bool New) { ShouldUseShrinkWrap2 = New; }
+
   /// Return a set of physical registers that are pristine.
   ///
   /// Pristine registers hold a value that is useless to the current function,
Index: include/llvm/CodeGen/ShrinkWrapper.h
===================================================================
--- /dev/null
+++ include/llvm/CodeGen/ShrinkWrapper.h
@@ -0,0 +1,331 @@
+//===- llvm/CodeGen/ShrinkWrapper.h - Shrink Wrapping Utility ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class is the main utility to provide shrink-wrapping properties to any
+// kind of attributes. This is used to do callee-saved registers and stack
+// shrink-wrapping. The algorithm is based on "Minimizing Register Usage Penalty
+// at Procedure Calls - Fred C. Chow" [1], with the usage of SCCs to exclude
+// loops and provide a linear pass instead of a complete dataflow analysis.
+// FIXME: ShrinkWrap2: Random thoughts:
+// - r193749 removed an old pass that was an implementation of [1].
+// - Cost model: use MachineBlockFrequency and some instruction cost model?
+// - Split critical edges on demand?
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SHRINKWRAP_H
+#define LLVM_CODEGEN_SHRINKWRAP_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/PostOrderIterator.h"
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class MachineBlockFrequencyInfo;
+class MachineOptimizationRemarkEmitter;
+
+/// Information about the requirements on shrink-wrapping. This should describe
+/// what does "used" mean, and it should be the main interface to work with the
+/// targets and other shrink-wrappable inputs.
+class ShrinkWrapInfo {
+protected:
+  /// Track all the uses per basic block.
+  SmallVector<BitVector, 8> Uses;
+
+  /// The machine function we're shrink-wrapping.
+  const MachineFunction &MF;
+
+  /// Generic code to determine callee saved register uses. This checks for
+  /// regmasks, and tracks all the register units.
+  /// If there is an use on a terminator, the successors will also be marked as
+  /// used.
+  // FIXME: ShrinkWrap2: Make this a free-function outside shrink-wrapping.
+  void determineCSRUses();
+
+public:
+  ShrinkWrapInfo(const MachineFunction &MF)
+      : Uses(MF.getNumBlockIDs()), MF(MF) {}
+  /// Get the number of results we want per block. i.e. number of registers in
+  /// the target.
+  virtual unsigned getNumResultBits() const { return 0; }
+
+  /// Get the elements that are used for a particular basic block. The result is
+  /// `nullptr` if there are no uses.
+  virtual const BitVector *getUses(unsigned MBBNum) const;
+
+  /// Provide a way to print elements. Debug only.
+  // FIXME: ShrinkWrap2: Add DUMP macros.
+  virtual raw_ostream &printElt(unsigned Elt, raw_ostream &OS) const {
+    OS << Elt;
+    return OS;
+  };
+
+  virtual ~ShrinkWrapInfo() = default;
+};
+
+/// Iterator for successors / predecessors. This is here to work with
+/// SmallVector and std::vector at the same time.
+// FIXME: ShrinkWrap2: Use ArrayRef?
+typedef const MachineBasicBlock *const *MBBIterator;
+
+class ShrinkWrapper {
+  typedef BitVector MBBSet;
+  /// Result type used to store results / uses. The target decides the meaning
+  /// of the bits.
+  typedef BitVector TargetResultSet;
+  // Idx = MBB.getNumber()
+  typedef SmallVector<TargetResultSet, 8> BBResultSetMap;
+  typedef DenseMap<unsigned, TargetResultSet> SparseBBResultSetMap;
+
+  /// The shrink-wrapping analysis is based on two properties:
+  /// * Anticipation:
+  /// The use of a register is ancicipated at a given point if a use of the
+  /// register will be encountered in all possible execution paths leading from
+  /// that point.
+
+  /// * Availability:
+  /// The use of a register is available at a given point if a use of the
+  /// register has been encountered in all possible execution paths that lead to
+  /// that point.
+
+  /// Both attributes are propagated at the beginning and at the end of a block
+  /// (which could be an SCC, or a basic block).
+  // FIXME: ShrinkWrap2: Remove OUT/IN.
+  struct SWAttributes {
+    /// Is the element anticipated at the beginning of this block?
+    TargetResultSet ANTIN;
+    /// Is the element available at the end of this block?
+    TargetResultSet AVOUT;
+
+    /// Resize all the sets.
+    SWAttributes(const ShrinkWrapInfo &SWI) {
+      unsigned Max = SWI.getNumResultBits();
+      for (TargetResultSet *BV : {&ANTIN, &AVOUT})
+        (*BV).resize(Max);
+    }
+  };
+
+  // Idx = MBB.getNumber()
+  typedef SmallVector<SWAttributes, 4> AttributeMap;
+
+  /// An SCC that was discovered through the scc_iterator on the function.
+  /// This is used in order to detect loops, reducible *AND* irreducible.
+  struct SCCLoop {
+    typedef SmallVector<const MachineBasicBlock *, 4> MBBVector;
+    /// The successors of the SCC. These are blocks outside the SCC.
+    SetVector<const MachineBasicBlock *, MBBVector> Successors;
+    iterator_range<MBBIterator> successors() const {
+      return {&*Successors.begin(), &*Successors.end()};
+    }
+    /// The predecessors of the SCC. These are blocks outside the SCC.
+    SetVector<const MachineBasicBlock *, MBBVector> Predecessors;
+    iterator_range<MBBIterator> predecessors() const {
+      return {&*Predecessors.begin(), &*Predecessors.end()};
+    }
+    /// This number is the number of the first MBB in the SCC.
+    unsigned Number;
+    unsigned getNumber() const { return Number; }
+    /// The number of blocks the SCC contains.
+    unsigned Size;
+    unsigned getSize() const { return Size; }
+  };
+
+  /// Wrapper around scc_iterator that collects SCCs that are loops, computes
+  /// their successor / predecessor and assigns an unique number based on the
+  /// basic blocks it contains.
+  struct SCCLoopInfo {
+    /// Own the SCCs.
+    SmallVector<SCCLoop, 4> SCCs;
+    /// Map a basic block number to an SCCLoop number. The SCCLoop number is
+    /// the position in the `SCCs` vector, and it is differrent from the
+    /// SCCLoop::Number attribute, which is the first basic block's number in
+    /// the SCC.
+    DenseMap<unsigned, unsigned> MBBToSCC;
+
+    /// Initialize the successors / predecessors of the SCCLoops.
+    SCCLoopInfo(const MachineFunction &MF);
+    /// Get the SCCLoop for a designated basic block number. If there is no
+    /// SCCLoop associated, return `nullptr`.
+    SCCLoop *getSCCLoopFor(unsigned MBBNum) {
+      auto It = MBBToSCC.find(MBBNum);
+      if (It == MBBToSCC.end())
+        return nullptr;
+      return &SCCs[It->second];
+    }
+    const SCCLoop *getSCCLoopFor(unsigned MBBNum) const {
+      return const_cast<SCCLoopInfo *>(this)->getSCCLoopFor(MBBNum);
+    }
+  };
+
+  /// The MachineFunction we're working on.
+  const MachineFunction &MF;
+
+  /// Target-found uses.
+  // FIXME: ShrinkWrap2: Use the one from ShrinkWrapInfo, but detecting critical
+  // edges may need to modify it.
+  BBResultSetMap Uses;
+
+  // FIXME: ShrinkWrap2: Is this the correct place to compute this?
+  /// Blocks that never return.
+  MBBSet NoReturnBlocks;
+
+  /// Target-specific shrink-wrap information.
+  std::unique_ptr<ShrinkWrapInfo> SWI;
+
+  /// The replacement for the MachineLoopInfo, that handles irreducible loops
+  /// as well.
+  SCCLoopInfo SI;
+
+  /// Final results.
+  SparseBBResultSetMap Saves;
+  SparseBBResultSetMap Restores;
+
+  /// Number of times the attributes have been recomputed because of critical
+  /// edges.
+  unsigned AttributesRecomputed = 0;
+
+  /// All the elements encountered so far.
+  TargetResultSet AllElts;
+
+  /// The CFG we're working on is no longer composed of basic blocks. It's
+  /// basically the CFG of SCCs, and we're using numbers to identify nodes. A
+  /// simple basic block's number is MBB->getNumber(), and a SCC that is a
+  /// loop gets the number of the first basic block encountered. For that,
+  /// we're using the following functions to traverse our CFG.
+
+  /// Get the block number or the SCCLoop's number.
+  unsigned blockNumber(unsigned MBBNum) const;
+  /// Get the block successors or the SCCLoop exit blocks.
+  iterator_range<MBBIterator> blockSuccessors(unsigned MBBNum) const;
+  /// Get the block predecessors or the SCCLoop's predecessors.
+  iterator_range<MBBIterator> blockPredecessors(unsigned MBBNum) const;
+
+  /// Anticipability
+  // If there is an use of this on *all* the paths starting from
+  // this basic block, the element is anticipated at the end of this
+  // block.
+  // (propagate the IN attribute of successors to possibly merge saves)
+  //           -
+  //          | *false*             if no successor.
+  // ANTOUT = |
+  //          | && ANTIN(succ[i])   otherwise.
+  //
+  bool ANTOUT(const AttributeMap &Attrs, unsigned MBBNum, unsigned Elt) const {
+    auto Successors = blockSuccessors(MBBNum);
+    if (Successors.begin() == Successors.end())
+      return false;
+    return all_of(Successors, [&](const MachineBasicBlock *S) {
+      return Attrs[blockNumber(S->getNumber())].ANTIN.test(Elt);
+    });
+  }
+
+  /// Availability
+  // If there is an use of this on *all* the paths arriving in this block,
+  // then the element is available in this block (propagate the out attribute
+  // of predecessors to possibly merge restores).
+  //         -
+  //        | *false*             if no predecessor.
+  // AVIN = |
+  //        | && AVOUT(pred[i])   otherwise.
+  //         -
+  bool AVIN(const AttributeMap &Attrs, unsigned MBBNum, unsigned Elt) const {
+    auto Predecessors = blockPredecessors(MBBNum);
+    if (Predecessors.begin() == Predecessors.end())
+      return false;
+    return all_of(Predecessors, [&](const MachineBasicBlock *P) {
+      return Attrs[blockNumber(P->getNumber())].AVOUT.test(Elt);
+    });
+  }
+
+  /// Determine uses based on ShrinkWrapInfo.
+  // FIXME: ShrinkWrap2: Remove. Call SWI directly.
+  void determineUses();
+  /// Remove uses and fill NoReturnBlocks with the blocks that we know are not
+  /// going to return from the function.
+  /// FIXME: ShrinkWrap2: Is this the correct place to compute this?
+  void removeUsesOnNoReturnPaths();
+  void dumpUses() const;
+  /// Mark all the basic blocks / SCCs around a loop (pred, succ) as used,
+  /// if there is an usage of a CSR inside a loop. We want to avoid any save /
+  /// restore operations inside a loop.
+  void markUsesOutsideLoops();
+
+  /// Compute the attributes for one element.
+  // FIXME: ShrinkWrap2: Don't do this per element.
+  void computeAttributes(
+      unsigned Elt, AttributeMap &Attrs,
+      ReversePostOrderTraversal<const MachineFunction *> &RPOT) const;
+  /// Save the results for this particular element.
+  // FIXME: ShrinkWrap2: Don't do this per element.
+  void gatherAttributesResults(unsigned Elt, AttributeMap &Attrs);
+  /// Check for critical edges and mark new blocks as needed.
+  // FIXME: ShrinkWrap2: Don't do this per element.
+  bool hasCriticalEdges(unsigned Elt, AttributeMap &Attrs);
+  /// Dump the contents of the attributes.
+  // FIXME: ShrinkWrap2: Don't do this per element.
+  void dumpAttributes(unsigned Elt, const AttributeMap &Attrs) const;
+
+  /// * Verify if the results are better than obvious results, like:
+  ///   * CSR used in a single MBB: only one save and one restore.
+  /// * Remove empty entries from the Saves / Restores maps.
+  // FIXME: ShrinkWrap2: This shouldn't happen, we better fix the algorithm
+  // first.
+  void postProcessResults(const BBResultSetMap &OldUses);
+  /// Compute the shrink-wrapping cost, which is based on block frequency.
+  unsigned computeShrinkWrappingCost(MachineBlockFrequencyInfo *MBFI) const;
+  /// Compute the same cost, in entry / return blocks, which is based on block
+  /// frequency.
+  unsigned computeDefaultCost(MachineBlockFrequencyInfo *MBFI) const;
+  /// Verify save / restore points by walking the CFG.
+  /// This asserts if anything went wrong.
+  // FIXME: ShrinkWrap2: Should this be guarded by a macro?
+  void verifySavesRestores() const;
+
+  /// Dump the final shrink-wrapping results.
+  void dumpResults() const;
+
+public:
+  /// Run the shrink-wrapper on the function. If there are no uses, there will
+  /// be no saves / restores.
+  /// By default, run the shrink-wrapper with the target's CSRShrinkWrapInfo.
+  ShrinkWrapper(const MachineFunction &MF);
+  /// Run the shrink-wrapper with a custom ShrinkWrapInfo.
+  ShrinkWrapper(const MachineFunction &MF, std::unique_ptr<ShrinkWrapInfo> SWI);
+
+  /// Check if the function has any uses that can be shrink-wrapped.
+  bool hasUses() const { return !Uses.empty(); }
+
+  /// Get the target's shrink-wrap info.
+  ShrinkWrapInfo &getSWI() { return *SWI; };
+  const ShrinkWrapInfo &getSWI() const { return *SWI; };
+
+  /// Get the final results.
+  const SparseBBResultSetMap &getSaves() { return Saves; }
+  const SparseBBResultSetMap &getRestores() { return Restores; }
+
+  /// Emit optimization remarks for the whole function.
+  void emitRemarks(MachineOptimizationRemarkEmitter *ORE,
+                   MachineBlockFrequencyInfo *MBFI) const;
+
+  /// Check that the final results are better than the default behaviour.
+  bool areResultsInteresting(MachineBlockFrequencyInfo *MBFI) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_SHRINKWRAP_H
Index: include/llvm/Target/TargetFrameLowering.h
===================================================================
--- include/llvm/Target/TargetFrameLowering.h
+++ include/llvm/Target/TargetFrameLowering.h
@@ -15,6 +15,8 @@
 #define LLVM_TARGET_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/ShrinkWrapper.h"
 #include <utility>
 #include <vector>
 
@@ -23,6 +25,7 @@
   class CalleeSavedInfo;
   class MachineFunction;
   class RegScavenger;
+  class ShrinkWrapInfo;
 
 /// Information about stack frame layout on the target.  It holds the direction
 /// of stack growth, the known stack alignment on entry to each function, and
@@ -326,6 +329,13 @@
     return true;
   }
 
+  // FIXME: ShrinkWrap2: Yet another target hook to be removed later. See
+  // comment in PrologEpilogInserter.cpp:579
+  virtual void
+  processValidCalleeSavedInfo(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const {}
+
   /// Check if given function is safe for not having callee saved registers.
   /// This is used when interprocedural register allocation is enabled.
   static bool isSafeForNoCSROpt(const Function *F) {
@@ -339,6 +349,13 @@
           return false;
     return true;
   }
+
+  /// Provide all the target-hooks needed for shrink-wrapping.
+  virtual std::unique_ptr<ShrinkWrapInfo>
+  createCSRShrinkWrapInfo(const MachineFunction &MF) const {
+    llvm_unreachable("Target didn't implement a ShrinkWrapInfo subclass!");
+    return nullptr;
+  }
 };
 
 } // End llvm namespace
Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -899,8 +900,13 @@
   return true;
 }
 
-AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
-  if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
+  ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+  if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+      ExceptionHandlingType != ExceptionHandling::ARM)
+    return CFI_M_None;
+
+  if (ExceptionHandlingType == ExceptionHandling::DwarfCFI &&
       MF->getFunction()->needsUnwindTableEntry())
     return CFI_M_EH;
 
@@ -910,16 +916,135 @@
   return CFI_M_None;
 }
 
+void AsmPrinter::generateShrinkWrappingCFI() {
+  // Reset everything.
+  ExtraSaveCFI.clear();
+  ExtraRestoreCFI.clear();
+
+  // FIXME: ShrinkWrap2: Gather all the saving points (based on CFI).
+  CSRMap Saves;
+  // FIXME: ShrinkWrap2: Gather all the restoring points (based on CFI).
+  CSRMap Restores;
+
+  const MCRegisterInfo *MCRI = MF->getMMI().getContext().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Collect all the CSRs and their index.
+  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned DwarfReg = MCRI->getDwarfRegNum(CSRegs[i], true);
+    unsigned Reg = MCRI->getLLVMRegNum(DwarfReg, false);
+    RegToCSRIdx[Reg] = i;
+  }
+
+  // First pass, collect .cfi_offset and .cfi_restore directives:
+  // * .cfi_offset represents a csr save
+  // * .cfi_restore represents a csr restore
+  for (const MachineBasicBlock &MBB : *MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (!MI.isCFIInstruction())
+        continue;
+      const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
+      unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+      const MCCFIInstruction &CFI = Instrs[CFIIndex];
+
+      // Check if it's a save.
+      if (CFI.getOperation() == MCCFIInstruction::OpOffset) {
+        unsigned DwarfReg = CFI.getRegister();
+        unsigned Reg = MCRI->getLLVMRegNum(DwarfReg, false);
+        if (RegToCSRIdx.count(Reg)) {
+          BitVector &Save = Saves[MBB.getNumber()];
+          Save.resize(RegToCSRIdx.size());
+          Save.set(RegToCSRIdx[Reg]);
+        }
+      }
+
+      // Check if it's a restore.
+      if (CFI.getOperation() == MCCFIInstruction::OpRestore) {
+        unsigned DwarfReg = CFI.getRegister();
+        unsigned Reg = MCRI->getLLVMRegNum(DwarfReg, false);
+        if (RegToCSRIdx.count(Reg)) {
+          BitVector &Restore = Restores[MBB.getNumber()];
+          Restore.resize(RegToCSRIdx.size());
+          Restore.set(RegToCSRIdx[Reg]);
+        }
+      }
+    }
+  }
+
+  // Compute the "liveness" of the CSRs. A CSR is live if it has been saved,
+  // and killed if it has been restored.
+  SmallVector<BitVector, 8> LiveCSRs{MF->getNumBlockIDs()};
+  for (BitVector &BV : LiveCSRs)
+    BV.resize(RegToCSRIdx.size());
+
+  ReversePostOrderTraversal<const MachineFunction *> RPOT(MF);
+  for (const MachineBasicBlock *MBB : RPOT) {
+    BitVector &LiveHere = LiveCSRs[MBB->getNumber()];
+    // LIVE(MBB) += LIVE(EACH_PRED) - RESTORE(EACH_PRED) + SAVE(MBB)
+    // Propagate the liveness information.
+    for (const MachineBasicBlock *Pred : MBB->predecessors())
+      LiveHere |= LiveCSRs[Pred->getNumber()];
+    // If any of the predecessors restored any CSR, kill them.
+    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+      auto Found = Restores.find(Pred->getNumber());
+      if (Found == Restores.end())
+        continue;
+      BitVector &Killed = Found->second;
+      LiveHere.flip();
+      LiveHere |= Killed;
+      LiveHere.flip();
+    }
+    // If this block saved any CSRs, make them live.
+    auto Found = Saves.find(MBB->getNumber());
+    if (Found == Saves.end())
+      continue;
+    BitVector &Saved = Found->second;
+    LiveHere |= Saved;
+  }
+
+  // Now compute the state changes we need in between the blocks.
+  BitVector LastState(RegToCSRIdx.size());
+  for (const MachineBasicBlock &MBB : *MF) {
+    BitVector &LiveHere = LiveCSRs[MBB.getNumber()];
+    if (&MBB != &MF->front()) {
+      auto Prev = std::prev(MBB.getIterator());
+      auto Found = Restores.find(Prev->getNumber());
+      if (Found != Restores.end() && !Found->second.empty()) {
+        BitVector &Killed = Found->second;
+        LastState.flip();
+        LastState |= Killed;
+        LastState.flip();
+      }
+    }
+
+    // Save everything that is added in the current state and was not there in
+    // the last one (and the saves that are already here).
+    BitVector ToSave = LastState;
+    ToSave |= Saves[MBB.getNumber()];
+    ToSave.flip();
+    ToSave &= LiveHere;
+    if (ToSave.count())
+      ExtraSaveCFI[MBB.getNumber()] = std::move(ToSave);
+
+    // Restore everything that is not in the current state anymore but it was
+    // in the last one.
+    BitVector ToRestore = LastState;
+    ToRestore.flip();
+    ToRestore |= LiveHere;
+    ToRestore.flip();
+    if (ToRestore.count())
+      ExtraRestoreCFI[MBB.getNumber()] = std::move(ToRestore);
+
+    LastState = LiveHere;
+  }
+}
+
 bool AsmPrinter::needsSEHMoves() {
   return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry();
 }
 
 void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
-  ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
-  if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
-      ExceptionHandlingType != ExceptionHandling::ARM)
-    return;
-
   if (needsCFIMoves() == CFI_M_None)
     return;
 
@@ -1429,6 +1554,14 @@
   EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
                              ? PrintSchedule
                              : STI.supportPrintSchedInfo();
+
+  if (needsCFIMoves() == CFI_M_None)
+    return;
+
+  // FIXME: ShrinkWrap2: Compute the blocks that need CFI state switching.
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MFI.getShouldUseShrinkWrap2())
+    generateShrinkWrappingCFI();
 }
 
 namespace {
@@ -2659,6 +2792,42 @@
   } else {
     OutStreamer->EmitLabel(MBB.getSymbol());
   }
+
+  // FIXME: ShrinkWrap2: Insert the CFI that are needed to do the transition
+  // between each block.
+  if (needsCFIMoves() == CFI_M_None)
+    return;
+
+  DenseMap<unsigned, unsigned> CSRIdxToCSIIdx;
+  const MCRegisterInfo *MCRI = MF->getMMI().getContext().getRegisterInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSIs = MFI.getCalleeSavedInfo();
+  for (auto &KV : enumerate(CSIs)) {
+    const CalleeSavedInfo &CSI = KV.value();
+    unsigned Reg = CSI.getReg();
+    unsigned DwarfReg = MCRI->getDwarfRegNum(Reg, true);
+    Reg = MCRI->getLLVMRegNum(DwarfReg, false);
+    unsigned CSIIdx = KV.index();
+    CSRIdxToCSIIdx[RegToCSRIdx.lookup(Reg)] = CSIIdx;
+  }
+
+  if (MFI.getShouldUseShrinkWrap2()) {
+    const MCRegisterInfo *MRI = MF->getMMI().getContext().getRegisterInfo();
+    for (unsigned CSRIdx : ExtraSaveCFI.lookup(MBB.getNumber()).set_bits()) {
+      const CalleeSavedInfo &CSI = CSIs[CSRIdxToCSIIdx[CSRIdx]];
+      int64_t Offset = MFI.getObjectOffset(CSI.getFrameIdx());
+      unsigned DwarfReg = MRI->getDwarfRegNum(CSI.getReg(), true);
+      // .cfi_offset %reg, off
+      emitCFIInstruction(
+          MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+    }
+    for (unsigned CSRIdx : ExtraRestoreCFI.lookup(MBB.getNumber()).set_bits()) {
+      const CalleeSavedInfo &CSI = CSIs[CSRIdxToCSIIdx[CSRIdx]];
+      unsigned DwarfReg = MRI->getDwarfRegNum(CSI.getReg(), true);
+      // .cfi_restore %reg
+      emitCFIInstruction(MCCFIInstruction::createRestore(nullptr, DwarfReg));
+    }
+  }
 }
 
 void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
Index: lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1020,6 +1020,7 @@
   // non-frame setup location marks the beginning of the function body.
   // FIXME: is there a simpler a way to do this? Can we just search
   // for the first instruction of the function, not the last of the prolog?
+  // FIXME: ShrinkWrap2: This won't work with shrink-wrapping, I guess.
   DebugLoc PrologEndLoc;
   bool EmptyPrologue = true;
   for (const auto &MBB : *MF) {
Index: lib/CodeGen/CMakeLists.txt
===================================================================
--- lib/CodeGen/CMakeLists.txt
+++ lib/CodeGen/CMakeLists.txt
@@ -127,6 +127,8 @@
   ScoreboardHazardRecognizer.cpp
   ShadowStackGCLowering.cpp
   ShrinkWrap.cpp
+# FIXME: ShrinkWrap2: Merge.
+  ShrinkWrapper.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
   SpillPlacement.cpp
Index: lib/CodeGen/PrologEpilogInserter.cpp
===================================================================
--- lib/CodeGen/PrologEpilogInserter.cpp
+++ lib/CodeGen/PrologEpilogInserter.cpp
@@ -20,15 +20,18 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/ShrinkWrapper.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
@@ -41,18 +44,20 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include <climits>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "prologepilog"
 
+// FIXME: ShrinkWrap2: Fix name.
+cl::opt<cl::boolOrDefault>
+    EnableShrinkWrap2Opt("enable-shrink-wrap2", cl::Hidden,
+                         cl::desc("enable the shrink-wrapping 2 pass"));
+
 typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
-static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
-                                   unsigned &MinCSFrameIndex,
-                                   unsigned &MaxCXFrameIndex,
-                                   const MBBVector &SaveBlocks,
-                                   const MBBVector &RestoreBlocks);
 
 namespace {
 class PEI : public MachineFunctionPass {
@@ -64,6 +69,9 @@
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
+  /// \brief Check if shrink wrapping is enabled for this target and function.
+  bool isShrinkWrapEnabled(const MachineFunction &MF);
+
   MachineFunctionProperties getRequiredProperties() const override {
     MachineFunctionProperties MFP;
     if (UsesCalleeSaves)
@@ -77,16 +85,13 @@
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
 private:
-  std::function<void(MachineFunction &MF, RegScavenger *RS,
-                     unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex,
-                     const MBBVector &SaveBlocks,
-                     const MBBVector &RestoreBlocks)>
-      SpillCalleeSavedRegisters;
+  std::function<void(MachineFunction &MF)> SpillCalleeSavedRegisters;
   std::function<void(MachineFunction &MF, RegScavenger &RS)>
       ScavengeFrameVirtualRegs;
 
   bool UsesCalleeSaves = false;
 
+  // FIXME: ShrinkWrap2: Temporary hack. Remove.
   RegScavenger *RS;
 
   // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
@@ -94,6 +99,7 @@
   unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max();
   unsigned MaxCSFrameIndex = 0;
 
+  // FIXME: ShrinkWrap2: Merge the shrink-wrapping logic here.
   // Save and Restore blocks of the current function. Typically there is a
   // single save block, unless Windows EH funclets are involved.
   MBBVector SaveBlocks;
@@ -108,6 +114,14 @@
   // FrameIndexVirtualScavenging is used.
   bool FrameIndexEliminationScavenging;
 
+  // Emit optimization remarks.
+  MachineOptimizationRemarkEmitter *ORE;
+
+  void doSpillCalleeSavedRegs(MachineFunction &MF);
+  void doSpillCalleeSavedRegsShrinkWrap2(MachineFunction &Fn,
+                                         CalleeSavedMap &Saves,
+                                         CalleeSavedMap &Restores);
+
   void calculateCallFrameInfo(MachineFunction &Fn);
   void calculateSaveRestoreBlocks(MachineFunction &Fn);
 
@@ -132,6 +146,8 @@
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
                     "Prologue/Epilogue Insertion & Frame Finalization", false,
                     false)
@@ -148,9 +164,57 @@
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<StackProtector>();
+  AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+  AU.addRequired<MachineBlockFrequencyInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+bool PEI::isShrinkWrapEnabled(const MachineFunction &MF) {
+  auto BecauseOf = [&](const char *Title, const char *Msg, DebugLoc Loc = {}) {
+    MachineOptimizationRemarkMissed R(DEBUG_TYPE, Title, Loc, &MF.front());
+    R << "Couldn't shrink-wrap this function because " << Msg;
+    ORE->emit(R);
+    return false;
+  };
+
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+  switch (EnableShrinkWrap2Opt) {
+  case cl::BOU_UNSET: {
+    if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+      return BecauseOf("ShrinkWrapDisabledOpt",
+                       "shrink-wrapping is enabled at O1+.");
+
+    if (!TFI->enableShrinkWrapping(MF))
+      return BecauseOf("ShrinkWrapDisabledTarget",
+                       "shrink-wrapping is not enabled on this target.");
+    // Windows with CFI has some limitations that make it impossible
+    // to use shrink-wrapping.
+    if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
+      return BecauseOf("ShrinkWrapDisabledWindowsCFI",
+                       "shrink-wrapping does not support Windows CFI yet.");
+
+    // Sanitizers look at the value of the stack at the location
+    // of the crash. Since a crash can happen anywhere, the
+    // frame must be lowered before anything else happen for the
+    // sanitizers to be able to get a correct stack frame.
+    if (MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
+      return BecauseOf("ShrinkWrapDisabledASAN",
+                       "shrink-wrapping can't be enabled with ASAN.");
+    if (MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread))
+      return BecauseOf("ShrinkWrapDisabledTSAN",
+                       "shrink-wrapping can't be enabled with TSAN.");
+    if (MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
+      return BecauseOf("ShrinkWrapDisabledMSAN",
+                       "shrink-wrapping can't be enabled with MSAN.");
+  }
+  case cl::BOU_TRUE:
+    return true;
+  case cl::BOU_FALSE:
+    return false;
+  }
+  llvm_unreachable("Invalid shrink-wrapping state");
+}
 
 /// StackObjSet - A set of stack object indexes
 typedef SmallSetVector<int, 8> StackObjSet;
@@ -162,12 +226,12 @@
   if (!SpillCalleeSavedRegisters) {
     const TargetMachine &TM = Fn.getTarget();
     if (!TM.usesPhysRegsForPEI()) {
-      SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
-                                     unsigned &, unsigned &, const MBBVector &,
-                                     const MBBVector &) {};
+      SpillCalleeSavedRegisters = [](MachineFunction &) {};
       ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
     } else {
-      SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
+      SpillCalleeSavedRegisters = [this](MachineFunction &MF) {
+        return this->doSpillCalleeSavedRegs(MF);
+      };
       ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
       UsesCalleeSaves = true;
     }
@@ -177,10 +241,14 @@
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
 
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
+  // FIXME: ShrinkWrap2: Temporary hack. Remove.
+  MFI.RS = RS;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
   FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
     TRI->requiresFrameIndexReplacementScavenging(Fn);
+  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
   // function's frame information. Also eliminates call frame pseudo
@@ -192,8 +260,7 @@
   calculateSaveRestoreBlocks(Fn);
 
   // Handle CSR spilling and restoring, for targets that need it.
-  SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
-                            SaveBlocks, RestoreBlocks);
+  SpillCalleeSavedRegisters(Fn);
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
@@ -226,13 +293,14 @@
   }
 
   // Warn on stack size when we exceeds the given limit.
-  MachineFrameInfo &MFI = Fn.getFrameInfo();
   uint64_t StackSize = MFI.getStackSize();
   if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
     DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
     F->getContext().diagnose(DiagStackSize);
   }
 
+  // FIXME: ShrinkWrap2: Temporary hack. Remove.
+  MFI.RS = nullptr;
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();
@@ -306,6 +374,8 @@
 
   // Use the points found by shrink-wrapping, if any.
   if (MFI.getSavePoint()) {
+    // FIXME: ShrinkWrap2: Remove check.
+    assert(!MFI.getShouldUseShrinkWrap2() && "Mixing shrink-wrapping passes.");
     SaveBlocks.push_back(MFI.getSavePoint());
     assert(MFI.getRestorePoint() && "Both restore and save must be set");
     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
@@ -327,6 +397,95 @@
   }
 }
 
+/// Insert code that saves the callee saved registers used in the basic block.
+static void insertCSRSaves(MachineBasicBlock &SaveBB,
+                           ArrayRef<CalleeSavedInfo> CSIs) {
+  MachineFunction &Fn = *SaveBB.getParent();
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+
+  assert(!CSIs.empty() && "No saves to insert.");
+
+  MachineBasicBlock::iterator I = SaveBB.begin();
+  if (!TFI.spillCalleeSavedRegisters(SaveBB, I, CSIs, &TRI)) {
+    for (const CalleeSavedInfo &CSI : CSIs) {
+      unsigned Reg = CSI.getReg();
+
+      // Update liveness.
+      if (!Fn.getRegInfo().isLiveIn(Reg))
+        SaveBB.addLiveIn(Reg);
+
+      // Insert the spill to the stack frame.
+      // FIXME: ShrinkWrap2: Check if can be killed.
+      const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(SaveBB, I, Reg, false, CSI.getFrameIdx(), RC,
+                              &TRI);
+      std::prev(I)->setFlag(MachineInstr::FrameSetup);
+
+      // FIXME: ShrinkWrap2: Check wether we need CFI, even though it is
+      // ignored by the AsmPrinter.
+      // Emit CFI for every CSR spill:
+      // .cfi_offset %reg, off
+      MachineFrameInfo &MFI = Fn.getFrameInfo();
+      if (MFI.getShouldUseShrinkWrap2()) {
+        unsigned Offset = MFI.getObjectOffset(CSI.getFrameIdx());
+        const MCRegisterInfo *MRI = Fn.getMMI().getContext().getRegisterInfo();
+        unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+        unsigned CFIIndex = Fn.addFrameInst(
+            MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+        BuildMI(SaveBB, I, {}, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+      }
+    }
+  }
+}
+
+/// Insert code that restores the callee saved registers used in the basic
+/// block.
+static void insertCSRRestores(MachineBasicBlock &RestoreBB,
+                              ArrayRef<CalleeSavedInfo> CSIs) {
+  MachineFunction &Fn = *RestoreBB.getParent();
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+
+  assert(!CSIs.empty() && "No restores to insert.");
+
+  // Restore using target interface.
+  MachineBasicBlock::iterator I = RestoreBB.getFirstTerminator();
+
+  // Restore all registers immediately before the return and any terminators
+  // that precede it.
+  if (!TFI.restoreCalleeSavedRegisters(RestoreBB, I, CSIs, &TRI)) {
+    for (int i = CSIs.size() - 1; i >= 0; --i) {
+      unsigned Reg = CSIs[i].getReg();
+      const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(RestoreBB, I, Reg, CSIs[i].getFrameIdx(), RC,
+                               &TRI);
+      std::prev(I)->setFlag(MachineInstr::FrameDestroy);
+
+      assert(I != RestoreBB.begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
+
+      // FIXME: ShrinkWrap2: Check wether we need CFI, even though it is
+      // ignored by the AsmPrinter.
+      // Emit CFI for every CSR restore.
+      // .cfi_restore %reg
+      MachineFrameInfo &MFI = Fn.getFrameInfo();
+      if (MFI.getShouldUseShrinkWrap2()) {
+        MachineModuleInfo &MMI = Fn.getMMI();
+        const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+        unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+        unsigned CFIIndex =
+            Fn.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfReg));
+        BuildMI(RestoreBB, I, {}, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+      }
+    }
+  }
+}
+
 static void assignCalleeSavedSpillSlots(MachineFunction &F,
                                         const BitVector &SavedRegs,
                                         unsigned &MinCSFrameIndex,
@@ -398,6 +557,13 @@
   }
 
   MFI.setCalleeSavedInfo(CSI);
+  // FIXME: ShrinkWrap2: AArch64FrameLowering needs to call
+  // computeCaleeSaveRegisterPairs *after* calling the generic code above. We
+  // could duplicate this code inside
+  // AArch64FrameLowering::assignCalleeSavedSpillSlots, but we need to update
+  // MinCSFrameIndex and MaxCSFrameIndex.
+  if (MFI.getShouldUseShrinkWrap2())
+    TFI->processValidCalleeSavedInfo(F, RegInfo, CSI);
 }
 
 /// Helper function to update the liveness information for the callee-saved
@@ -475,74 +641,133 @@
   if (CSI.empty())
     return;
 
-  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
-  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
-  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
-  MachineBasicBlock::iterator I;
-
   // Spill using target interface.
   for (MachineBasicBlock *SaveBlock : SaveBlocks) {
-    I = SaveBlock->begin();
-    if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
-      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-        // Insert the spill to the stack frame.
-        unsigned Reg = CSI[i].getReg();
-        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-        TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
-                                RC, TRI);
-      }
-    }
+    insertCSRSaves(*SaveBlock, CSI);
     // Update the live-in information of all the blocks up to the save point.
     updateLiveness(Fn);
   }
 
   // Restore using target interface.
-  for (MachineBasicBlock *MBB : RestoreBlocks) {
-    I = MBB->end();
-
-    // Skip over all terminator instructions, which are part of the return
-    // sequence.
-    MachineBasicBlock::iterator I2 = I;
-    while (I2 != MBB->begin() && (--I2)->isTerminator())
-      I = I2;
-
-    bool AtStart = I == MBB->begin();
-    MachineBasicBlock::iterator BeforeI = I;
-    if (!AtStart)
-      --BeforeI;
-
-    // Restore all registers immediately before the return and any
-    // terminators that precede it.
-    if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
-      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-        unsigned Reg = CSI[i].getReg();
-        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-        TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
-        assert(I != MBB->begin() &&
-               "loadRegFromStackSlot didn't insert any code!");
-        // Insert in reverse order.  loadRegFromStackSlot can insert
-        // multiple instructions.
-        if (AtStart)
-          I = MBB->begin();
-        else {
-          I = BeforeI;
-          ++I;
-        }
+  for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+    insertCSRRestores(*RestoreBlock, CSI);
+}
+
+// FIXME: ShrinkWrap2: Name.
+void PEI::doSpillCalleeSavedRegsShrinkWrap2(MachineFunction &Fn,
+                                            CalleeSavedMap &Saves,
+                                            CalleeSavedMap &Restores) {
+  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+
+  // Now gather the callee-saved registers we found using shrink-wrapping.
+  // FIXME: ShrinkWrap2: We already gathered all the CSRs in ShrinkWrap. Reuse
+  // somehow?
+  BitVector ShrinkWrapSavedRegs(TRI.getNumRegs());
+  for (auto &Save : Saves)
+    for (const CalleeSavedInfo &CSI : Save.second)
+      ShrinkWrapSavedRegs.set(CSI.getReg());
+
+  // FIXME: ShrinkWrap2: Re-use stack slots.
+  assignCalleeSavedSpillSlots(Fn, ShrinkWrapSavedRegs, MinCSFrameIndex,
+                              MaxCSFrameIndex);
+
+  MFI.setCalleeSavedInfoValid(true);
+
+  if (Fn.getFunction()->hasFnAttribute(Attribute::Naked))
+    return;
+
+  // FIXME: ShrinkWrap2: This is awful. We first call
+  // assignCalleeSavedSpillSlots, that fills MFI.CalleeSavedInfo which is used
+  // for the ENTIRE function. Then, we need to reassign the FrameIdx back to the
+  // Saves / Restores map.
+  SmallVector<std::pair<std::vector<CalleeSavedInfo> *, unsigned>, 2> ToRemove;
+  const std::vector<CalleeSavedInfo> &CSIs = MFI.getCalleeSavedInfo();
+  for (auto *Map : {&Saves, &Restores}) {
+    for (auto &Elt : *Map) {
+      for (const CalleeSavedInfo &CSI : Elt.second) {
+        unsigned Reg = CSI.getReg();
+        // Look for the register in the assigned CSIs, and reassign it in the
+        // map.
+        auto It = find_if(CSIs, [&](const CalleeSavedInfo &NewCSI) {
+          return NewCSI.getReg() == Reg;
+        });
+        if (It != CSIs.end())
+          // FIXME: ShrinkWrap2: const_cast...
+          const_cast<CalleeSavedInfo &>(CSI).setFrameIdx(It->getFrameIdx());
+        else // Also, if we can't find it in the list, it means the target
+             // removed it. x86 does this for FP, since the spill is part of the
+             // prologue emission.
+          ToRemove.emplace_back(&Elt.second, Reg);
       }
     }
   }
+  for (auto& Pair : ToRemove) {
+    std::vector<CalleeSavedInfo> &V = *Pair.first;
+    unsigned Reg = Pair.second;
+    V.erase(std::remove_if(V.begin(), V.end(),
+                           [&](const CalleeSavedInfo &CSI) {
+                             return CSI.getReg() == Reg;
+                           }),
+            V.end());
+  }
+
+  for (auto &Save : Saves) {
+    insertCSRSaves(*Save.first, Save.second);
+    // FIXME: ShrinkWrap2: Update liveness only after all spills / restores?
+    updateLiveness(Fn);
+  }
+
+  for (auto &Restore : Restores)
+    insertCSRRestores(*Restore.first, Restore.second);
 }
 
-static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
-                                   unsigned &MinCSFrameIndex,
-                                   unsigned &MaxCSFrameIndex,
-                                   const MBBVector &SaveBlocks,
-                                   const MBBVector &RestoreBlocks) {
+void PEI::doSpillCalleeSavedRegs(MachineFunction &Fn) {
   const Function *F = Fn.getFunction();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+
   MinCSFrameIndex = std::numeric_limits<unsigned>::max();
   MaxCSFrameIndex = 0;
 
+
+  /// If any, contains better save points for the prologue found by
+  /// shrink-wrapping.
+  CalleeSavedMap Saves;
+  /// If any, contains better restore points for the epilogue found by
+  /// shrink-wrapping.
+  CalleeSavedMap Restores;
+
+  if (!Fn.empty() && isShrinkWrapEnabled(Fn)) {
+    ShrinkWrapper SW(Fn);
+    auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+    if (SW.areResultsInteresting(MBFI)) {
+      MachineFrameInfo &MFI = Fn.getFrameInfo();
+      MFI.setShouldUseShrinkWrap2(true);
+      SW.emitRemarks(ORE, MBFI);
+    }
+    auto &SWSaves = SW.getSaves();
+    auto &SWRestores = SW.getRestores();
+    const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
+    auto Transform = [&](const DenseMap<unsigned, BitVector> &Src,
+                         CalleeSavedMap &Dst) {
+      for (auto &KV : Src) {
+        MachineBasicBlock *MBB = Fn.getBlockNumbered(KV.first);
+        const BitVector &Regs = KV.second;
+        std::vector<CalleeSavedInfo> &CSI = Dst[MBB];
+
+        for (unsigned RegIdx : Regs.set_bits())
+          CSI.emplace_back(CSRegs[RegIdx]);
+      }
+    };
+    Transform(SWSaves, Saves);
+    Transform(SWRestores, Restores);
+  }
+
+  // FIXME: ShrinkWrap2: Share code somehow.
+  if (MFI.getShouldUseShrinkWrap2())
+    return doSpillCalleeSavedRegsShrinkWrap2(Fn, Saves, Restores);
+
   // Determine which of the registers in the callee save list should be saved.
   BitVector SavedRegs;
   TFI->determineCalleeSaves(Fn, SavedRegs, RS);
@@ -977,6 +1202,11 @@
 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
 
+  // FIXME: ShrinkWrap2: Stack alginment / adjustment / etc. go in emitPrologue.
+  // For now, we add these at the entry / exit of the function, and we spill
+  // callee saves using our own blocks. There should be a way to shrink-wrap the
+  // stack operations as well.
+
   // Add prologue to the function...
   for (MachineBasicBlock *SaveBlock : SaveBlocks)
     TFI.emitPrologue(Fn, *SaveBlock);
@@ -985,9 +1215,11 @@
   for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
     TFI.emitEpilogue(Fn, *RestoreBlock);
 
+  // FIXME: ShrinkWrap2: Will this still work?
   for (MachineBasicBlock *SaveBlock : SaveBlocks)
     TFI.inlineStackProbe(Fn, *SaveBlock);
 
+  // FIXME: ShrinkWrap2: Will this still work?
   // Emit additional code that is required to support segmented stacks, if
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
@@ -997,6 +1229,7 @@
       TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
   }
 
+  // FIXME: ShrinkWrap2: Will this still work?
   // Emit additional code that is required to explicitly handle the stack in
   // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
   // approach is rather similar to that of Segmented Stacks, but it uses a
Index: lib/CodeGen/ShrinkWrapper.cpp
===================================================================
--- /dev/null
+++ lib/CodeGen/ShrinkWrapper.cpp
@@ -0,0 +1,891 @@
+//===- lib/CodeGen/ShrinkWrapper.cpp - Shrink Wrapping Utility --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Shrink-wrapper implementation.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+#include "llvm/CodeGen/ShrinkWrapper.h"
+
+// FIXME: ShrinkWrap2: Name
+#define DEBUG_TYPE "shrink-wrap2"
+
+#define VERBOSE_DEBUG(X)                                                       \
+  do {                                                                         \
+    if (VerboseDebug)                                                          \
+      DEBUG(X);                                                                \
+  } while (0);
+
+using namespace llvm;
+
+// FIXME: ShrinkWrap2: Remove ?
+static cl::opt<cl::boolOrDefault>
+    VerboseDebug("shrink-wrap-verbose", cl::Hidden,
+                 cl::desc("verbose debug output"));
+
+// FIXME: ShrinkWrap2: Remove, debug.
+static cl::opt<cl::boolOrDefault> ViewCFGDebug("shrink-wrap-view", cl::Hidden,
+                                               cl::desc("view cfg"));
+
+void ShrinkWrapInfo::determineCSRUses() {
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Walk all the uses of each callee-saved register, and map them to their
+  // basic blocks.
+  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+
+  BitVector CSRegUnits(TRI.getNumRegUnits());
+  DenseMap<unsigned, unsigned> RegUnitToCSRIdx;
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    for (MCRegUnitIterator RegUnit(CSRegs[i], &TRI); RegUnit.isValid();
+         ++RegUnit) {
+      RegUnitToCSRIdx[*RegUnit] = i;
+      CSRegUnits.set(*RegUnit);
+    }
+  }
+
+  auto MarkAsUsedBase = [&](unsigned RegIdx, unsigned MBBNum) {
+
+    BitVector &Used = Uses[MBBNum];
+    if (Used.empty())
+      Used.resize(getNumResultBits());
+    Used.set(RegIdx);
+  };
+  auto MarkAsUsed = [&](unsigned RegIdx, const MachineBasicBlock &MBB,
+                        bool isTerminator = false) {
+    unsigned MBBNum = MBB.getNumber();
+    MarkAsUsedBase(RegIdx, MBBNum);
+    // If it's a terminator, mark the successors as used as well,
+    // since we can't save after a terminator (i.e. cbz w23, #10).
+    if (isTerminator)
+      for (MachineBasicBlock *Succ : MBB.successors())
+        MarkAsUsedBase(RegIdx, Succ->getNumber());
+  };
+
+  // FIXME: ShrinkWrap2: Naked functions.
+  // FIXME: ShrinkWrap2: __builtin_unwind_init.
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      for (const MachineOperand &MO : MI.operands()) {
+
+        if (MO.isRegMask()) {
+          // Check for regmasks only on the original CSR, as the aliases are not
+          // always there.
+          for (unsigned i = 0; CSRegs[i]; ++i)
+            if (MO.clobbersPhysReg(CSRegs[i]))
+              MarkAsUsed(i, MBB, MI.isTerminator());
+        } else if (MO.isReg() && MO.getReg() && (MO.readsReg() || MO.isDef())) {
+          for (MCRegUnitIterator RegUnit(MO.getReg(), &TRI); RegUnit.isValid();
+               ++RegUnit)
+            if (CSRegUnits.test(*RegUnit))
+              MarkAsUsed(RegUnitToCSRIdx[*RegUnit], MBB, MI.isTerminator());
+        }
+      }
+    }
+  }
+}
+
+const BitVector *ShrinkWrapInfo::getUses(unsigned MBBNum) const {
+  auto& Use = Uses[MBBNum];
+  if (Use.empty())
+    return nullptr;
+  return &Use;
+}
+
+ShrinkWrapper::SCCLoopInfo::SCCLoopInfo(const MachineFunction &MF) {
+  // Create the SCCLoops.
+  for (auto I = scc_begin(&MF); !I.isAtEnd(); ++I) {
+    // Skip non-loop SCCs.
+    if (!I.hasLoop())
+      continue;
+
+    SCCs.emplace_back();
+    // The SCCLoop number is the first basic block number in the SCC.
+    unsigned Number = (*I->begin())->getNumber();
+    SCCs.back().Number = Number;
+    SCCs.back().Size = I->size();
+
+    // The number used in MBBToSCC is the position of the SCC in `SCCs`
+    for (const MachineBasicBlock *MBB : *I)
+      MBBToSCC[MBB->getNumber()] = SCCs.size() - 1;
+  }
+
+  // Compute successors / predecessors of the SCCLoops.
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineBasicBlock *Succ : MBB.successors()) {
+      SCCLoop *MBBSCC = getSCCLoopFor(MBB.getNumber());
+      SCCLoop *SuccSCC = getSCCLoopFor(Succ->getNumber());
+      // The successor is a loop, but not the current block. It means the
+      // successor's predecessor is the current block.
+      if (!MBBSCC && SuccSCC)
+        SuccSCC->Predecessors.insert(&MBB);
+      // The successor is not a loop, but the current block is one. It means
+      // that the loop's successor is the block's successor.
+      else if (MBBSCC && !SuccSCC)
+        MBBSCC->Successors.insert(Succ);
+      // The successor and the block are loops. We now need to connect SCCs
+      // together.
+      else if (MBBSCC && SuccSCC && MBBSCC != SuccSCC) {
+        MBBSCC->Successors.insert(Succ);
+        SuccSCC->Predecessors.insert(&MBB);
+      }
+    }
+    for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+      SCCLoop *MBBSCC = getSCCLoopFor(MBB.getNumber());
+      SCCLoop *PredSCC = getSCCLoopFor(Pred->getNumber());
+      // The predecessor is a loop, but not the current block. It means the
+      // predecessor's successor is the current block.
+      if (!MBBSCC && PredSCC)
+        PredSCC->Successors.insert(&MBB);
+      // The predecessor is not a loop, but the current block is one. It
+      // means that the loop's predecessor is the block's predecessor.
+      else if (MBBSCC && !PredSCC)
+        MBBSCC->Predecessors.insert(Pred);
+      // The successor and the block are loops. We now need to connect SCCs
+      // together.
+      else if (MBBSCC && PredSCC && MBBSCC != PredSCC) {
+        MBBSCC->Predecessors.insert(Pred);
+        PredSCC->Successors.insert(&MBB);
+      }
+    }
+  }
+}
+
+unsigned ShrinkWrapper::blockNumber(unsigned MBBNum) const {
+  if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum))
+    return C->getNumber();
+  return MBBNum;
+}
+
+iterator_range<MBBIterator>
+ShrinkWrapper::blockSuccessors(unsigned MBBNum) const {
+  if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum))
+    return {C->Successors.begin(), C->Successors.end()};
+  const MachineBasicBlock *MBB = MF.getBlockNumbered(MBBNum);
+  return {&*MBB->succ_begin(), &*MBB->succ_end()};
+}
+
+iterator_range<MBBIterator>
+ShrinkWrapper::blockPredecessors(unsigned MBBNum) const {
+  if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum))
+    return {C->Predecessors.begin(), C->Predecessors.end()};
+  const MachineBasicBlock *MBB = MF.getBlockNumbered(MBBNum);
+  return {&*MBB->pred_begin(), &*MBB->pred_end()};
+}
+
+void ShrinkWrapper::determineUses() {
+  // FIXME: ShrinkWrap2: We do unnecessary copies here.
+  for (const MachineBasicBlock &MBB : MF) {
+    if (const TargetResultSet *Use = SWI->getUses(MBB.getNumber())) {
+      unsigned MBBNum = blockNumber(MBB.getNumber());
+      Uses[MBBNum].resize(SWI->getNumResultBits());
+      Uses[MBBNum] |= *Use;
+    }
+  }
+}
+
+void ShrinkWrapper::removeUsesOnNoReturnPaths() {
+  NoReturnBlocks.resize(MF.getNumBlockIDs());
+
+  // Mark all reachable blocks from any return blocks.
+  for (const MachineBasicBlock &MBB : MF)
+    if (MBB.isReturnBlock())
+      for (const MachineBasicBlock *Block : inverse_depth_first(&MBB))
+        NoReturnBlocks.set(Block->getNumber());
+
+  // Flip, so that we can get the non-reachable blocks.
+  NoReturnBlocks.flip();
+
+  for (unsigned MBBNum : NoReturnBlocks.set_bits()) {
+    DEBUG(dbgs() << "Remove uses from no-return BB#" << MBBNum << '\n');
+    Uses[MBBNum].clear();
+  }
+}
+
+void ShrinkWrapper::dumpUses() const {
+  for (const auto& Use : enumerate(Uses)) {
+    if (!Use.value().count())
+      continue;
+
+    dbgs() << "BB#" << Use.index() << " uses : ";
+    int Elt = Use.value().find_first();
+    if (Elt >= 0)
+      SWI->printElt(Elt, dbgs());
+    for (Elt = Use.value().find_next(Elt); Elt > 0;
+         Elt = Use.value().find_next(Elt)) {
+      dbgs() << ", ";
+      SWI->printElt(Elt, dbgs());
+    }
+    dbgs() << '\n';
+  }
+}
+
+void ShrinkWrapper::markUsesOutsideLoops() {
+  // Keep track of the elements to attach to a basic block.
+  SparseBBResultSetMap ToInsert;
+  for (const auto &Use : enumerate(Uses)) {
+    unsigned MBBNum = Use.index();
+    const TargetResultSet &Elts = Use.value();
+
+    auto Mark = [&](const MachineBasicBlock *Block) {
+      unsigned BlockNum = Block->getNumber();
+      TargetResultSet &ToInsertTo = ToInsert[BlockNum];
+      if (ToInsertTo.empty())
+        ToInsertTo.resize(SWI->getNumResultBits());
+      ToInsertTo |= Elts;
+      VERBOSE_DEBUG(dbgs() << "Mark: BB#" << BlockNum << '\n');
+    };
+
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) {
+      DEBUG(dbgs() << "Loop for CSR: BB#" << MBBNum << '\n');
+
+      // Mark all the entry blocks of the loop.
+      for (const MachineBasicBlock *Block : C->predecessors())
+        Mark(Block);
+
+      // Mark all the exit blocks of the loop.
+      for (const MachineBasicBlock *Exit : C->successors())
+        Mark(Exit);
+    }
+  }
+
+  for (auto &KV : ToInsert)
+    Uses[blockNumber(KV.first)] |= KV.second;
+}
+
+void ShrinkWrapper::computeAttributes(
+    unsigned Elt, AttributeMap &Attrs,
+    ReversePostOrderTraversal<const MachineFunction *> &RPOT) const {
+  auto UsesElt = [&](unsigned MBBNum) {
+    auto &Use = Uses[MBBNum];
+    if (Use.empty())
+      return false;
+    return Use.test(Elt);
+  };
+
+  auto Assign = [&](TargetResultSet &Set, bool New) {
+    if (Set.test(Elt) != New)
+      Set.flip(Elt);
+  };
+
+  // Count how many times we visited a SCCLoop.
+  DenseMap<const SCCLoop *, unsigned> SCCVisited;
+
+  // PO traversal for anticipation computation. We want to handle the SCC only
+  // when we reach the *LAST* component.
+  for (const MachineBasicBlock *MBB : make_range(RPOT.rbegin(), RPOT.rend())) {
+    unsigned MBBNum = MBB->getNumber();
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBB->getNumber())) {
+      if (++SCCVisited[C] != C->getSize())
+        continue;
+      else
+        MBBNum = C->getNumber();
+    }
+
+    SWAttributes &Attr = Attrs[MBBNum];
+
+    // If the element is used in the block, or if it is anticipated in all
+    // successors it is also anticipated at the beginning, since we consider
+    // entire blocks.
+    //          -
+    // ANTIN = | APP || ANTOUT
+    //          -
+    TargetResultSet &ANTINb = Attr.ANTIN;
+    bool NewANTIN = UsesElt(MBBNum) || ANTOUT(Attrs, MBBNum, Elt);
+    Assign(ANTINb, NewANTIN);
+  }
+
+  // Reuse the map.
+  SCCVisited.clear();
+
+  // RPO traversal for availability computation. We want to handle the SCC only
+  // when we reach the *FIRST* component.
+  for (const MachineBasicBlock *MBB : RPOT) {
+    unsigned MBBNum = MBB->getNumber();
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBB->getNumber())) {
+      if (++SCCVisited[C] != 1)
+        continue;
+      else
+        MBBNum = C->getNumber();
+    }
+
+    SWAttributes &Attr = Attrs[MBBNum];
+
+    // If the element is used in the block, or if it is always available in
+    // all predecessors , it is also available on exit, since we consider
+    // entire blocks.
+    //          -
+    // AVOUT = | APP || AVIN
+    //          -
+    TargetResultSet &AVOUTb = Attr.AVOUT;
+    bool NewAVOUT = UsesElt(MBBNum) || AVIN(Attrs, MBBNum, Elt);
+    Assign(AVOUTb, NewAVOUT);
+  }
+
+  VERBOSE_DEBUG(dumpAttributes(Elt, Attrs));
+}
+
+bool ShrinkWrapper::hasCriticalEdges(unsigned Elt, AttributeMap &Attrs) {
+  bool Needs = false;
+  for (const MachineBasicBlock &MBB : MF) {
+    bool IsSCCLoop = false;
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBB.getNumber())) {
+      // Skip all the blocks that are not the number of the SCC, since all the
+      // attributes are based on that number.
+      if (static_cast<unsigned>(MBB.getNumber()) != C->getNumber())
+        continue;
+      else
+        IsSCCLoop = true;
+    }
+
+    unsigned MBBNum = blockNumber(MBB.getNumber());
+    // If the block is never returning, we won't bother saving / restoring.
+    if (NoReturnBlocks.test(MBBNum))
+      continue;
+
+    SWAttributes &Attr = Attrs[MBBNum];
+    // Check if this block is ANTIN and has an incoming critical edge where it
+    // is not ANTIN. If it's the case, mark it as used, and recompute.
+    if (Attr.ANTIN.test(Elt)) {
+      auto Preds = blockPredecessors(MBBNum);
+      // We're looking for more than 2 predecessors. Also, if it's a SCCLoop, it
+      // has a predecessor that is itself.
+      if (std::distance(Preds.begin(), Preds.end()) >= 2 || IsSCCLoop) {
+        for (const MachineBasicBlock *P : Preds) {
+          unsigned PredNum = blockNumber(P->getNumber());
+          SWAttributes &Attr = Attrs[PredNum];
+          TargetResultSet &ANTINp = Attr.ANTIN;
+          if (!ANTINp.test(Elt)) {
+            // FIXME: ShrinkWrap2: emit remark.
+            VERBOSE_DEBUG(dbgs()
+                          << "Incoming critical edge in " << MBBNum << ".\n");
+            // Mark it as used.
+            TargetResultSet &Used = Uses[PredNum];
+            if (Used.empty())
+              Used.resize(SWI->getNumResultBits());
+            Used.set(Elt);
+
+            // Also, mark it as ANTIN and AVOUT, since we're not calling
+            // populateAttributes anymore.
+            ANTINp.set(Elt);
+            Attr.AVOUT.set(Elt);
+            Needs = true;
+          }
+        }
+      }
+    }
+    // Check if this block is AVOUT and has an outgoing critical edge where it
+    // is not AVOUT. If it's the case, mark it as used, and recompute.
+    if (Attr.AVOUT.test(Elt)) {
+      auto Succs = blockSuccessors(MBBNum);
+      // We're looking for more than 2 successors. Also, if it's a SCCLoop, it
+      // has a predecessor that is itself.
+      if (std::distance(Succs.begin(), Succs.end()) >= 2 || IsSCCLoop) {
+        for (const MachineBasicBlock *S : Succs) {
+          unsigned SuccNum = blockNumber(S->getNumber());
+          SWAttributes &Attr = Attrs[SuccNum];
+          TargetResultSet &AVOUTs = Attr.AVOUT;
+          if (!AVOUTs.test(Elt)) {
+            // FIXME: ShrinkWrap2: emit remark.
+            VERBOSE_DEBUG(dbgs()
+                          << "Outgoing critical edge in " << MBBNum << ".\n");
+            // Mark it as used.
+            TargetResultSet &Used = Uses[SuccNum];
+            if (Used.empty())
+              Used.resize(SWI->getNumResultBits());
+            Used.set(Elt);
+
+            // Also, mark it as AVOUT and ANTIN, since we're not calling
+            // populateAttrbutes anymore.
+            AVOUTs.set(Elt);
+            Attr.ANTIN.set(Elt);
+            Needs = true;
+          }
+        }
+      }
+    }
+  }
+  // Recompute if needed.
+  return Needs;
+}
+
+void ShrinkWrapper::gatherAttributesResults(unsigned Elt, AttributeMap &Attrs) {
+  for (const MachineBasicBlock &MBB : MF) {
+    bool IsSCCLoop = false;
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBB.getNumber())) {
+      // Skip all the blocks that are not the number of the SCC, since all the
+      // attributes are based on that number.
+      if (static_cast<unsigned>(MBB.getNumber()) != C->getNumber())
+        continue;
+      else
+        IsSCCLoop = true;
+    }
+
+    unsigned MBBNum = blockNumber(MBB.getNumber());
+    // If the block is never returning, we won't bother saving / restoring.
+    if (NoReturnBlocks.test(MBBNum))
+      continue;
+
+    SWAttributes &Attr = Attrs[MBBNum];
+
+    // If the uses are anticipated on *all* the paths leaving this block, and if
+    // it is not available at the entry of this block (if it is, then it means
+    // it has been saved already, but not restored), and if *none* of the
+    // predecessors anticipates this element on their output (we want to get the
+    // "highest" block), then we can identify a save point for the function.
+    //
+    // SAVE = ANTIN && !AVIN && !ANTIN(pred[i])
+    //
+    bool NS =
+        none_of(blockPredecessors(MBBNum), [&](const MachineBasicBlock *P) {
+          return Attrs[blockNumber(P->getNumber())].ANTIN.test(Elt);
+        });
+    if (NS && Attr.ANTIN.test(Elt) && !AVIN(Attrs, MBBNum, Elt)) {
+      TargetResultSet &Save = Saves[MBBNum];
+      if (Save.empty())
+        Save.resize(SWI->getNumResultBits());
+      Save.set(Elt);
+    }
+
+    // If the uses are available on *all* the paths leading to this block, and
+    // if the element is not anticipated at the exit of this block (if it is,
+    // then it means it has been restored already), and if *none* of the
+    // successors make the element available (we want to cover the // deepest //
+    // use), then we can identify a restrore point for the function.
+    //
+    // RESTORE = AVOUT && !ANTOUT && !AVOUT(succ[i])
+    //
+    bool NR = none_of(blockSuccessors(MBBNum), [&](const MachineBasicBlock *S) {
+      return Attrs[blockNumber(S->getNumber())].AVOUT.test(Elt);
+    });
+    if (NR && Attr.AVOUT.test(Elt) && !ANTOUT(Attrs, MBBNum, Elt)) {
+      TargetResultSet &Restore = Restores[MBBNum];
+      if (Restore.empty())
+        Restore.resize(SWI->getNumResultBits());
+      Restore.set(Elt);
+    }
+  }
+}
+
+void ShrinkWrapper::dumpAttributes(unsigned Elt,
+                                   const AttributeMap &Attrs) const {
+  for (const MachineBasicBlock &MBB : MF) {
+    unsigned MBBNum = MBB.getNumber();
+    if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum))
+      if (MBBNum != C->getNumber())
+        continue;
+    const SWAttributes &Attr = Attrs[MBBNum];
+    dbgs() << "BB#" << MBBNum << "<";
+    SWI->printElt(Elt, dbgs());
+    dbgs() << ">"
+           << ":\n\tANTOUT : " << ANTOUT(Attrs, MBBNum, Elt) << '\n'
+           << "\tANTIN : " << Attr.ANTIN.test(Elt) << '\n'
+           << "\tAVIN : " << AVIN(Attrs, MBBNum, Elt) << '\n'
+           << "\tAVOUT : " << Attr.AVOUT.test(Elt) << '\n';
+  }
+}
+
+void ShrinkWrapper::postProcessResults(const BBResultSetMap &OldUses) {
+  // If there is only one use of the element, and multiple saves / restores,
+  // remove them and place the save / restore at the used MBB's boundaries.
+  for (unsigned Elt : AllElts.set_bits()) {
+    // FIXME: ShrinkWrap2: 2x std::find_if.
+    auto HasElt = [&](const TargetResultSet &Res) {
+      return Res.empty() ? false : Res.test(Elt);
+    };
+    auto Found1 = find_if(OldUses, HasElt);
+    auto Found2 = Found1 == OldUses.end()
+                      ? Found1
+                      : std::find_if(std::next(Found1), OldUses.end(), HasElt);
+    if (Found1 != OldUses.end() && Found2 == OldUses.end()) {
+      // Gather all the saves.
+      MBBSet SavesElt(MF.getNumBlockIDs());
+      for (auto &KV : Saves) {
+        unsigned MBBNum = KV.first;
+        const TargetResultSet &Elts = KV.second;
+        if (Elts.test(Elt))
+          SavesElt.set(MBBNum);
+      }
+
+      // Gather all the restores.
+      MBBSet RestoresElt(MF.getNumBlockIDs());
+      for (auto &KV : Restores) {
+        unsigned MBBNum = KV.first;
+        const TargetResultSet &Elts = KV.second;
+        if (Elts.test(Elt))
+          RestoresElt.set(MBBNum);
+      }
+
+      // If we only have a single save and a single restore, keep it that way.
+      if (SavesElt.count() == 1 && RestoresElt.count() == 1)
+        continue;
+
+      // Remove saves and restores from the maps.
+      for (unsigned MBBNum : SavesElt.set_bits())
+        Saves[MBBNum].reset(Elt);
+      for (unsigned MBBNum : RestoresElt.set_bits())
+        Restores[MBBNum].reset(Elt);
+
+      // Add it to the unique block that uses it.
+      unsigned MBBNum = std::distance(OldUses.begin(), Found1);
+      for (auto *Map : {&Saves, &Restores}) {
+        TargetResultSet &Elts = (*Map)[MBBNum];
+        if (Elts.empty())
+          Elts.resize(SWI->getNumResultBits());
+        Elts.set(Elt);
+      }
+    }
+  }
+
+  // Remove all the empty entries from the Saves / Restores maps.
+  // FIXME: ShrinkWrap2: Should we even have empty entries?
+  SmallVector<SparseBBResultSetMap::iterator, 4> ToRemove;
+  for (auto *Map : {&Saves, &Restores}) {
+    for (auto It = Map->begin(), End = Map->end(); It != End; ++It)
+      if (It->second.count() == 0)
+        ToRemove.push_back(It);
+    for (auto It : ToRemove)
+      Map->erase(It);
+    ToRemove.clear();
+  }
+}
+
+unsigned ShrinkWrapper::computeShrinkWrappingCost(
+    MachineBlockFrequencyInfo *MBFI) const {
+  unsigned Cost = 0;
+  for (const MachineBasicBlock &MBB : MF) {
+    unsigned BlockCost = 0;
+    for (auto *Map : {&Saves, &Restores}) {
+      auto Found = Map->find(MBB.getNumber());
+      if (Found != Map->end())
+        BlockCost += Found->second.count();
+    }
+    auto Frequency =
+        static_cast<double>(MBFI->getBlockFreq(&MBB).getFrequency()) /
+        MBFI->getEntryFreq();
+    Cost += BlockCost * Frequency * 100;
+  }
+  return Cost;
+}
+
+unsigned
+ShrinkWrapper::computeDefaultCost(MachineBlockFrequencyInfo *MBFI) const {
+  unsigned Cost = 0;
+  for (const MachineBasicBlock &MBB : MF) {
+    unsigned BlockCost =
+        &MBB == &MF.front() || MBB.isReturnBlock() ? AllElts.count() : 0;
+    auto Frequency =
+        static_cast<double>(MBFI->getBlockFreq(&MBB).getFrequency()) /
+        MBFI->getEntryFreq();
+    Cost += BlockCost * Frequency * 100;
+  }
+  return Cost;
+}
+
+void ShrinkWrapper::verifySavesRestores() const {
+  auto HasElt = [&](const SparseBBResultSetMap &Map, unsigned Elt) {
+    return find_if(Map, [&](const std::pair<unsigned, TargetResultSet> &KV) {
+             return KV.second.test(Elt);
+           }) != Map.end();
+  };
+
+  auto RestoresElt = [&](unsigned Elt) { return HasElt(Restores, Elt); };
+  auto SavesElt = [&](unsigned Elt) { return HasElt(Saves, Elt); };
+
+  // Check that all the CSRs used in the function are saved at least once.
+  for (unsigned Elt : AllElts.set_bits())
+    if (!SavesElt(Elt) && !RestoresElt(Elt))
+      llvm_unreachable("Used CSR is never saved!");
+
+  // Check that there are no saves / restores in a loop.
+  for (const SparseBBResultSetMap *Map : {&Saves, &Restores})
+    for (auto &KV : *Map)
+      if (SI.getSCCLoopFor(KV.first))
+        llvm_unreachable("Save / restore in a loop.");
+
+  // Keep track of the currently saved elements.
+  TargetResultSet Saved(SWI->getNumResultBits());
+  // Cache the state of each call, to avoid redundant checks.
+  std::vector<SmallVector<TargetResultSet, 2>> Cache(MF.getNumBlockIDs());
+
+  // Verify if:
+  // * All the saves are restored.
+  // * All the restores are related to a store.
+  // * There are no nested stores.
+  std::function<void(const MachineBasicBlock *)> verifySavesRestoresRec =
+      [&](const MachineBasicBlock *MBB) {
+        unsigned MBBNum = MBB->getNumber();
+        // Don't even check no-return blocks.
+        if (MBB->succ_empty() && !MBB->isReturnBlock()) {
+          VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << " is an no-return\n");
+          return;
+        }
+
+        SmallVectorImpl<TargetResultSet> &State = Cache[MBBNum];
+        if (find(State, Saved) != State.end()) {
+          VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << " already visited.\n");
+          return;
+        }
+
+        State.push_back(Saved);
+
+        VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << ": Save ";
+                      for (unsigned Elt
+                           : Saved.set_bits()) {
+                        SWI->printElt(Elt, dbgs());
+                        dbgs() << " ";
+                      } dbgs()
+                      << '\n');
+
+        const TargetResultSet &SavesMBB = Saves.lookup(MBBNum);
+        const TargetResultSet &RestoresMBB = Restores.lookup(MBBNum);
+
+        // Get the intersection of the currently saved elements and the
+        // elements to be saved for this basic block. If the intersection is
+        // not empty, it means we have nested saves for the same elements.
+        TargetResultSet Intersection(SavesMBB);
+        Intersection &= Saved;
+
+        DEBUG(for (unsigned Elt
+                   : Intersection.set_bits()) {
+          SWI->printElt(Elt, dbgs());
+          dbgs() << " is saved twice.\n";
+        });
+
+        assert(Intersection.count() == 0 &&
+               "Nested saves for the same elements.");
+        Intersection.reset();
+
+        // Save the elements to be saved.
+        for (unsigned Elt : SavesMBB.set_bits()) {
+          Saved.set(Elt);
+          VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << ": Save ";
+                        SWI->printElt(Elt, dbgs()); dbgs() << ".\n");
+        }
+
+        // If the intersection of the currently saved elements and the
+        // elements to be restored for this basic block is not equal to the
+        // restores, it means we are trying to restore something that is not
+        // saved.
+        Intersection = RestoresMBB;
+        Intersection &= Saved;
+
+        assert(Intersection.count() == RestoresMBB.count() &&
+               "Not all restores are saved.");
+
+        // Restore the elements to be restored.
+        for (int Elt : RestoresMBB.set_bits()) {
+          Saved.reset(Elt);
+          VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << ": Restore ";
+                        SWI->printElt(Elt, dbgs()); dbgs() << ".\n");
+        }
+
+        if (MBB->succ_empty() && Saved.count() != 0)
+          llvm_unreachable("Not all saves are restored.");
+
+        // Using the current set of saved elements, walk all the successors
+        // recursively.
+        for (MachineBasicBlock *Succ : MBB->successors())
+          verifySavesRestoresRec(Succ);
+
+        // Restore the state prior of the function exit.
+        for (unsigned Elt : RestoresMBB.set_bits()) {
+          Saved.set(Elt);
+          VERBOSE_DEBUG(dbgs() << "OUT: BB#" << MBBNum << ": Save ";
+                        SWI->printElt(Elt, dbgs()); dbgs() << ".\n");
+        }
+        for (unsigned Elt : SavesMBB.set_bits()) {
+          Saved.reset(Elt);
+          VERBOSE_DEBUG(dbgs() << "OUT: BB#" << MBBNum << ": Restore ";
+                        SWI->printElt(Elt, dbgs()); dbgs() << ".\n");
+        }
+      };
+
+  verifySavesRestoresRec(&MF.front());
+}
+
+void ShrinkWrapper::emitRemarks(MachineOptimizationRemarkEmitter *ORE,
+                                MachineBlockFrequencyInfo *MBFI) const {
+  unsigned Cost = computeShrinkWrappingCost(MBFI);
+  unsigned DefaultCost = computeDefaultCost(MBFI);
+  int Improvement = DefaultCost - Cost;
+  MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "ShrinkWrapped", {},
+                                      &MF.front());
+  R << "Shrink-wrapped function with cost " << ore::NV("ShrinkWrapCost", Cost)
+    << " which is " << ore::NV("ShrinkWrapCostImprovement", Improvement)
+    << " better than "
+    << ore::NV("OriginalShrinkWrapCost", DefaultCost)
+    << ", during which attributes were recomputed "
+    << ore::NV("ShrinkWrapRecomputed", AttributesRecomputed) << " times.";
+  ORE->emit(R);
+}
+
+bool ShrinkWrapper::areResultsInteresting(
+    MachineBlockFrequencyInfo *MBFI) const {
+  if (!hasUses())
+    return false;
+  if (Saves.size() == 1) { // If we have only one save,
+    unsigned MBBNum = Saves.begin()->first;
+    unsigned FrontMBBNum = MF.front().getNumber();
+    const TargetResultSet &EltsSaved = Saves.begin()->second;
+    if (MBBNum == FrontMBBNum   // and the save it's in the entry block,
+        && EltsSaved == AllElts) { // and it saves *ALL* the CSRs
+      DEBUG(dbgs() << "No shrink-wrapping performed, all saves in the entry "
+                      "block.\n";);
+      return false; // then it's not interesting.
+    }
+  }
+
+  // If the cost with shrink wrapping is better than the default, use it.
+  unsigned Cost = computeShrinkWrappingCost(MBFI);
+  unsigned DefaultCost = computeDefaultCost(MBFI);
+  if (Cost >= DefaultCost)
+    DEBUG(dbgs() << "No shrink-wrapping performed. ShrinkWrapCost: " << Cost
+                 << ", DefaultCost: " << DefaultCost << '\n');
+  return Cost < DefaultCost;
+}
+
+void ShrinkWrapper::dumpResults() const {
+  for (unsigned MBBNum = 0; MBBNum < MF.getNumBlockIDs(); ++MBBNum) {
+    if (Saves.count(MBBNum) || Restores.count(MBBNum)) {
+      DEBUG(dbgs() << "BB#" << MBBNum << ": Saves: ");
+      auto Save = Saves.lookup(MBBNum);
+      DEBUG(for (unsigned Elt
+                 : Save.set_bits()) {
+        SWI->printElt(Elt, dbgs());
+        dbgs() << ", ";
+      });
+      DEBUG(dbgs() << "| Restores: ");
+      auto Restore = Restores.lookup(MBBNum);
+      DEBUG(for (unsigned Elt
+                 : Restore.set_bits()) {
+        SWI->printElt(Elt, dbgs());
+        dbgs() << ", ";
+      });
+
+      DEBUG(dbgs() << '\n');
+    }
+  }
+}
+
+ShrinkWrapper::ShrinkWrapper(const MachineFunction &MF)
+    : ShrinkWrapper(
+          MF,
+          MF.getSubtarget().getFrameLowering()->createCSRShrinkWrapInfo(MF)) {}
+
+ShrinkWrapper::ShrinkWrapper(const MachineFunction &MF,
+                             std::unique_ptr<ShrinkWrapInfo> SW)
+    : MF(MF), Uses(MF.getNumBlockIDs()), SWI(std::move(SW)), SI(MF) {
+  DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  if (ViewCFGDebug == cl::BOU_TRUE)
+    MF.viewCFGOnly();
+
+  VERBOSE_DEBUG(for (auto &SCC
+                     : SI.SCCs) {
+    dbgs() << "SCCLoop: " << SCC.getNumber() << "\n  Pred: ";
+    for (auto *Pred : SCC.Predecessors)
+      dbgs() << Pred->getNumber() << ", ";
+    dbgs() << "\n  Succ: ";
+    for (auto *Succ : SCC.Successors)
+      dbgs() << Succ->getNumber() << ", ";
+    dbgs() << '\n';
+  });
+
+  // FIXME: ShrinkWrap2: Remove. Call SWI directly.
+  determineUses();
+  if (!hasUses())
+    return;
+
+  DEBUG(dumpUses());
+
+  // Don't bother saving if we know we're never going to return.
+  removeUsesOnNoReturnPaths();
+  // FIXME: ShrinkWrap2: Check if there are any modifications before printing.
+  DEBUG(dbgs() << "**** After removing uses on no-return paths\n";);
+  DEBUG(dumpUses());
+
+  markUsesOutsideLoops();
+  // FIXME: ShrinkWrap2: Check if there are any modifications before printing.
+  DEBUG(dbgs() << "**** After marking uses inside loops\n";);
+  DEBUG(dumpUses());
+
+  // FIXME: ShrinkWrap2: Find a better way to avoid treating added CSRs the same
+  // as original ones. This is needed for postProcessResults.
+  // FIXME: ShrinkWrap2: Probably just save / restore once per block if there
+  // is only one register from the beginning.
+  auto OldUses = Uses;
+
+  AllElts.resize(SWI->getNumResultBits());
+  for (const auto &Use : Uses)
+    AllElts |= Use;
+
+  auto &EntryUses = Uses[MF.front().getNumber()];
+
+  // Compute the dataflow attributes described by Fred C. Chow.
+  AttributeMap Attrs;
+  // Reserve + emplace_back to avoid copies of empty bitvectors..
+  unsigned Max = MF.getNumBlockIDs();
+  Attrs.reserve(Max);
+  for (unsigned i = 0; i < Max; ++i)
+    Attrs.emplace_back(*SWI);
+  // For each register, compute the dataflow attributes.
+  // FIXME: ShrinkWrap2: Compute all elements at once.
+  ReversePostOrderTraversal<const MachineFunction *> RPOT(&MF);
+  for (unsigned Elt : AllElts.set_bits()) {
+    // If it's used in the entry block, don't even compute it. We know the
+    // results already.
+    if (!EntryUses.empty() && EntryUses.test(Elt))
+      continue;
+    // Compute the attributes.
+    computeAttributes(Elt, Attrs, RPOT);
+
+    // If we detected critical edges, compute again.
+    while (hasCriticalEdges(Elt, Attrs)) {
+      ++AttributesRecomputed;
+      computeAttributes(Elt, Attrs, RPOT);
+    }
+
+    gatherAttributesResults(Elt, Attrs);
+    VERBOSE_DEBUG(dumpResults());
+  }
+
+  VERBOSE_DEBUG(dbgs() << "**** Analysis results\n";);
+  VERBOSE_DEBUG(dumpResults());
+
+  if (!EntryUses.empty()) {
+    Saves[MF.front().getNumber()] |= EntryUses;
+    for (const MachineBasicBlock &MBB : MF) {
+      // FIXME: ShrinkWrap2: EHFuncletEntry.
+      if (MBB.isReturnBlock())
+        Restores[MBB.getNumber()] |= EntryUses;
+    }
+  }
+  postProcessResults(OldUses);
+
+  DEBUG(dbgs() << "**** Shrink-wrapping results\n");
+  // FIXME: ShrinkWrap2: Check if there are any modifications before printing.
+  DEBUG(dumpResults());
+
+// FIXME: ShrinkWrap2: Remove NDEBUG.
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
+  verifySavesRestores();
+#endif // EXPENSIVE_CHECKS
+}
Index: lib/CodeGen/TargetPassConfig.cpp
===================================================================
--- lib/CodeGen/TargetPassConfig.cpp
+++ lib/CodeGen/TargetPassConfig.cpp
@@ -39,6 +39,11 @@
 
 using namespace llvm;
 
+// FIXME: ShrinkWrap2: Keep the second one only. Move it from TPC when we
+// decided that ShrinkWrapping is no longer a pass.
+extern cl::opt<cl::boolOrDefault> EnableShrinkWrap2Opt;
+static cl::opt<int> ShrinkWrapPass("shrink-wrap-pass", cl::init(2), cl::Hidden,
+                                   cl::desc("Choose shrink-wrap-pass to use"));
 static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc Scheduler"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -719,8 +724,10 @@
   addPostRegAlloc();
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
-  if (getOptLevel() != CodeGenOpt::None)
+  if (getOptLevel() != CodeGenOpt::None && ShrinkWrapPass == 1) {
     addPass(&ShrinkWrapID);
+    EnableShrinkWrap2Opt = cl::BOU_FALSE;
+  }
 
   // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
   // do so if it hasn't been disabled, substituted, or overridden.
Index: lib/Target/AArch64/AArch64FrameLowering.h
===================================================================
--- lib/Target/AArch64/AArch64FrameLowering.h
+++ lib/Target/AArch64/AArch64FrameLowering.h
@@ -36,6 +36,12 @@
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
+  // FIXME: ShrinkWrap2: Delay the computation of NumRegsSpilled.
+  bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
+
   bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
 
   int getFrameIndexReference(const MachineFunction &MF, int FI,
@@ -69,6 +75,16 @@
 
   bool enableStackSlotScavenging(const MachineFunction &MF) const override;
 
+  // FIXME: ShrinkWrap2: We need this to call computeCalleeSaveRegisterParis
+  // before we spill them.
+  void
+  processValidCalleeSavedInfo(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
+
+  std::unique_ptr<ShrinkWrapInfo>
+  createCSRShrinkWrapInfo(const MachineFunction &MF) const override;
+
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       unsigned StackBumpBytes) const;
Index: lib/Target/AArch64/AArch64FrameLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64FrameLowering.cpp
+++ lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -137,6 +137,201 @@
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
+static bool produceCompactUnwindFrame(const MachineFunction &MF);
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF);
+
+class AArch64CSRShrinkWrapInfo final : public ShrinkWrapInfo {
+  /// Number of bits the result needs.
+  unsigned NumCSRs = 0;
+
+public:
+  unsigned getNumResultBits() const override { return NumCSRs; }
+
+  AArch64CSRShrinkWrapInfo(const MachineFunction &MF) : ShrinkWrapInfo(MF) {
+
+    // All calls are tail calls in GHC calling conv, and functions have no
+    // prologue/epilogue.
+    if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+      return;
+
+    const AArch64RegisterInfo *RegInfo =
+        static_cast<const AArch64RegisterInfo *>(
+            MF.getSubtarget().getRegisterInfo());
+    const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+    // Count the number of CSRs.
+    for (unsigned i = 0; CSRegs[i]; ++i)
+      ++NumCSRs;
+
+    determineCSRUses();
+
+    // FIXME: ShrinkWrap2: This is a duplicate of determineCalleeSaves. We
+    // should split this into multiple functions, and remove all the side
+    // effects from here.
+    auto AFI =
+        const_cast<AArch64FunctionInfo *>(MF.getInfo<AArch64FunctionInfo>());
+    unsigned UnspilledCSGPR = AArch64::NoRegister;
+    unsigned UnspilledCSGPRIdx = static_cast<unsigned>(-1);
+    unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
+    unsigned UnspilledCSGPRPairedIdx = static_cast<unsigned>(-1);
+
+    // FIXME: ShrinkWrap2: This should be available later somehow.
+    BitVector SavedRegs(getNumResultBits());
+    for (BitVector &BV : Uses)
+      SavedRegs |= BV;
+
+    auto *EntrySaves = &Uses[MF.front().getNumber()];
+    if (EntrySaves->empty())
+      EntrySaves->resize(getNumResultBits());
+
+    const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+    // The frame record needs to be created by saving the appropriate registers
+    if (TFI->hasFP(MF)) {
+      // The frame pointer needs to be used in the entry and return of a
+      // function, to prevent optimizations.
+      EntrySaves->set(AArch64::FP);
+      SavedRegs.set(AArch64::FP);
+      for (const MachineBasicBlock &MBB : MF) {
+        if (MBB.isReturnBlock()) {
+          BitVector &Use = Uses[MBB.getNumber()];
+          if (Use.empty())
+            Use.resize(getNumResultBits());
+          Use.set(AArch64::FP);
+          EntrySaves = &Uses[MF.front().getNumber()];
+        }
+      }
+      // FIXME: ShrinkWrap2: Should we let LR be shrink-wrapped?
+      // EntrySaves.set(AArch64::LR);
+      // SavedRegs.set(AArch64::LR);
+    }
+
+    unsigned BasePointerReg = AArch64::NoRegister;
+    if (RegInfo->hasBasePointer(MF))
+      BasePointerReg = RegInfo->getBaseRegister();
+
+    unsigned ExtraCSSpill = 0;
+    // Figure out which callee-saved registers to save/restore.
+    for (unsigned i = 0; CSRegs[i]; ++i) {
+      const unsigned Reg = CSRegs[i];
+      const unsigned RegIdx = i;
+
+      // Add the base pointer register to SavedRegs if it is callee-save.
+      if (Reg == BasePointerReg) {
+        EntrySaves->set(RegIdx);
+        SavedRegs.set(RegIdx);
+        // FIXME: ShrinkWrap2: gather the return blocks and re-use them.
+        for (const MachineBasicBlock &MBB : MF) {
+          if (MBB.isReturnBlock()) {
+            BitVector &Use = Uses[MBB.getNumber()];
+            if (Use.empty())
+              Use.resize(getNumResultBits());
+            Use.set(RegIdx);
+            EntrySaves = &Uses[MF.front().getNumber()];
+          }
+        }
+      }
+
+      bool RegUsed = SavedRegs.test(RegIdx);
+      unsigned PairedReg = CSRegs[i ^ 1];
+      unsigned PairedRegIdx = i ^ 1;
+      if (!RegUsed) {
+        if (AArch64::GPR64RegClass.contains(Reg) &&
+            !RegInfo->isReservedReg(MF, Reg)) {
+          UnspilledCSGPR = Reg;
+          UnspilledCSGPRIdx = RegIdx;
+          UnspilledCSGPRPaired = PairedReg;
+          UnspilledCSGPRPairedIdx = PairedRegIdx;
+        }
+        continue;
+      }
+
+      // MachO's compact unwind format relies on all registers being stored in
+      // pairs.
+      // FIXME: the usual format is actually better if unwinding isn't needed.
+      // FIXME: ShrinkWrap2: don't check if the paired register is saved if it's
+      // not a callee save. This can happen if we have an odd number of CSRs
+      // (like MostRegsCC).
+      if (produceCompactUnwindFrame(MF) && PairedRegIdx < NumCSRs &&
+          !SavedRegs.test(PairedRegIdx)) {
+        SavedRegs.set(PairedRegIdx);
+        if (AArch64::GPR64RegClass.contains(PairedReg) &&
+            !RegInfo->isReservedReg(MF, PairedReg))
+          ExtraCSSpill = PairedReg;
+      }
+    }
+
+    DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+          for (int RegIdx
+               : SavedRegs.set_bits()) dbgs()
+          << ' ' << PrintReg(CSRegs[RegIdx], RegInfo);
+          dbgs() << "\n";);
+
+    // If any callee-saved registers are used, the frame cannot be eliminated.
+    unsigned NumRegsSpilled = SavedRegs.count();
+    bool CanEliminateFrame = NumRegsSpilled == 0;
+
+    // The CSR spill slots have not been allocated yet, so estimateStackSize
+    // won't include them.
+    MachineFrameInfo &MFI = const_cast<MachineFrameInfo &>(MF.getFrameInfo());
+    unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
+    DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
+    unsigned EstimatedStackSizeLimit =
+        estimateRSStackSizeLimit(const_cast<MachineFunction &>(MF));
+    bool BigStack = (CFSize > EstimatedStackSizeLimit);
+    if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
+      AFI->setHasStackFrame(true);
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additional
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging. If we already spilled an extra callee-saved register
+    // above to keep the number of spills even, we don't need to do anything
+    // else here.
+    if (BigStack) {
+      if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
+        DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
+                     << " to get a scratch register.\n");
+        EntrySaves->set(UnspilledCSGPRIdx);
+        // FIXME: ShrinkWrap2: Mark it in the return blocks too.
+        SavedRegs.set(UnspilledCSGPRIdx);
+        // MachO's compact unwind format relies on all registers being stored in
+        // pairs, so if we need to spill one extra for BigStack, then we need to
+        // store the pair.
+        if (produceCompactUnwindFrame(MF)) {
+          EntrySaves->set(UnspilledCSGPRPairedIdx);
+          // FIXME: ShrinkWrap2: Mark it in the return blocks too.
+          SavedRegs.set(UnspilledCSGPRPairedIdx);
+        }
+        ExtraCSSpill = UnspilledCSGPRPaired;
+        NumRegsSpilled = SavedRegs.count();
+      }
+
+      // If we didn't find an extra callee-saved register to spill, create
+      // an emergency spill slot.
+      if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
+        const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+        const TargetRegisterClass &RC = AArch64::GPR64RegClass;
+        unsigned Size = TRI->getSpillSize(RC);
+        unsigned Align = TRI->getSpillAlignment(RC);
+        int FI = MFI.CreateStackObject(Size, Align, false);
+        // FIXME: ShrinkWrap2: Temporary hack. Remove.
+        MFI.RS->addScavengingFrameIndex(FI);
+        DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
+                     << " as the emergency spill slot.\n");
+      }
+    }
+
+    // Round up to register pair alignment to avoid additional SP adjustment
+    // instructions.
+    AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+  }
+
+  raw_ostream &printElt(unsigned Elt, raw_ostream &OS) const override {
+    auto &TRI = *MF.getSubtarget().getRegisterInfo();
+    OS << PrintReg(TRI.getCalleeSavedRegs(&MF)[Elt], &TRI);
+    return OS;
+  }
+};
+
 /// Look at each instruction that references stack frames and return the stack
 /// size limit beyond which some of these instructions will require a scratch
 /// register during their expansion later.
@@ -364,6 +559,7 @@
 // Convert callee-save register save/restore instruction to do stack pointer
 // decrement/increment to allocate/deallocate the callee-save stack area by
 // converting store/load to use pre/post increment version.
+LLVM_ATTRIBUTE_USED // FIXME: ShrinkWrap2: Remove attribute when we reuse this.
 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
@@ -434,6 +630,8 @@
 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
                                               unsigned LocalStackSize) {
   unsigned Opc = MI.getOpcode();
+  if (Opc == TargetOpcode::CFI_INSTRUCTION)
+    return;
   (void)Opc;
   assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
           Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
@@ -474,6 +672,7 @@
     return;
 
   int NumBytes = (int)MFI.getStackSize();
+  // FIXME: ShrinkWrap2: This is set by determineCalleeSaves. Seems wrong to me.
   if (!AFI->hasStackFrame()) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
 
@@ -502,6 +701,7 @@
     return;
   }
 
+  // FIXME: ShrinkWrap2: This is set by determineCalleeSaves. Seems wrong to me.
   auto CSStackSize = AFI->getCalleeSavedStackSize();
   // All of the remaining stack allocations are for locals.
   AFI->setLocalStackSize(NumBytes - CSStackSize);
@@ -512,8 +712,16 @@
                     MachineInstr::FrameSetup);
     NumBytes = 0;
   } else if (CSStackSize != 0) {
+    // FIXME: ShrinkWrap2: For now, we can't use push / pop for save / restore
+    // of CSR.
+    if (MFI.getShouldUseShrinkWrap2()) {
+      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -CSStackSize,
+                      TII, MachineInstr::FrameSetup);
+    }
+    else {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
                                                      -CSStackSize);
+    }
     NumBytes -= CSStackSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -527,6 +735,13 @@
       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
     ++MBBI;
   }
+  if (CombineSPBump &&
+      (MFI.getShouldUseShrinkWrap2() || MFI.getShouldUseStackShrinkWrap2())) {
+    for (MachineOperand *MO : AFI->getCSROffsetsToFix()) {
+      MachineInstr &MI = *MO->getParent();
+      fixupCalleeSaveRestoreStackOffset(MI, AFI->getLocalStackSize());
+    }
+  }
   if (HasFP) {
     // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
     int FPOffset = CSStackSize - 16;
@@ -552,13 +767,30 @@
     }
 
     // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
+    if (!canUseRedZone(MF)) {
       // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
       // the correct value here, as NumBytes also includes padding bytes,
       // which shouldn't be counted here.
       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                       MachineInstr::FrameSetup);
 
+      // FIXME: ShrinkWrap2: If we have another stack allocation here, and we're
+      // using SP for all the non-entry/non-return blocks, we have to fixup our
+      // offsets emitted for the callee saved regs. The ideal would be to know
+      // if we have this extra local stack allocation when computing the
+      // offsets, but that information is not available yet at that point.
+
+      // Another solution would be to actually use the FI operands as all the
+      // targets do, and let resolveFrameIndex do the job.
+      for (MachineOperand *MO : AFI->getCSROffsetsToFix())
+        MO->setImm(MO->getImm() + NumBytes / 8); // This is SP-relative, it only
+                                                 // occurs when we don't have a
+                                                 // stack frame. Which means
+                                                 // that the offset is unsigned
+                                                 // and scaled, so we need to
+                                                 // divide by 8.
+    }
+
     if (NeedsRealignment) {
       const unsigned Alignment = MFI.getMaxAlignment();
       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
@@ -682,6 +914,10 @@
           .setMIFlags(MachineInstr::FrameSetup);
     }
 
+
+    // FIXME: ShrinkWrap2: We emit CFI when we emit the instructions.
+    if (MFI.getShouldUseShrinkWrap2())
+      return;
     // Now emit the moves for whatever callee saved regs we have (including FP,
     // LR if those are saved).
     emitCalleeSavedFrameMoves(MBB, MBBI);
@@ -758,9 +994,13 @@
   auto CSStackSize = AFI->getCalleeSavedStackSize();
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
 
+  // FIXME: ShrinkWrap2: For now, we can't use push / pop for save / restore
+  // of CSR.
+  if (!MFI.getShouldUseShrinkWrap2()) {
   if (!CombineSPBump && CSStackSize != 0)
     convertCalleeSaveRestoreToSPPrePostIncDec(
         MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
+  }
 
   // Move past the restores of the callee-saved registers.
   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
@@ -782,6 +1022,12 @@
     return;
   }
 
+  // FIXME: ShrinkWrap2: For now, we can't use push / pop for save / restore
+  // of CSR, so we have to restore SP manually.
+  if (MFI.getShouldUseShrinkWrap2()) {
+    emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+                    CSStackSize, TII, MachineInstr::FrameDestroy);
+  }
   NumBytes -= CSStackSize;
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
 
@@ -908,7 +1154,11 @@
   return getKillRegState(!IsLiveIn);
 }
 
-static bool produceCompactUnwindFrame(MachineFunction &MF) {
+static bool produceCompactUnwindFrame(const MachineFunction &MF) {
+  // FIXME: ShrinkWrap2: Fix compact unwinding.
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MFI.getShouldUseShrinkWrap2())
+    return false;
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   AttributeList Attrs = MF.getFunction()->getAttributes();
   return Subtarget.isTargetMachO() &&
@@ -916,22 +1166,6 @@
            Attrs.hasAttrSomewhere(Attribute::SwiftError));
 }
 
-namespace {
-
-struct RegPairInfo {
-  unsigned Reg1 = AArch64::NoRegister;
-  unsigned Reg2 = AArch64::NoRegister;
-  int FrameIdx;
-  int Offset;
-  bool IsGPR;
-
-  RegPairInfo() = default;
-
-  bool isPaired() const { return Reg2 != AArch64::NoRegister; }
-};
-
-} // end anonymous namespace
-
 static void computeCalleeSaveRegisterPairs(
     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
@@ -946,10 +1180,13 @@
   (void)CC;
   // MachO's compact unwind format relies on all registers being stored in
   // pairs.
+  // FIXME: ShrinkWrap2: Fix compact unwind format.
+  if (!MFI.getShouldUseShrinkWrap2()) {
   assert((!produceCompactUnwindFrame(MF) ||
           CC == CallingConv::PreserveMost ||
           (Count & 1) == 0) &&
          "Odd number of callee-saved regs to spill!");
+  }
   unsigned Offset = AFI->getCalleeSavedStackSize();
 
   for (unsigned i = 0; i < Count; ++i) {
@@ -961,12 +1198,16 @@
     RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
 
     // Add the next reg to the pair if it is in the same register class.
+    // FIXME: ShrinkWrap2: Creating real pairs during shrink-wrapping may have
+    // double save / restores, that can corrupt registers.
+    if (!MFI.getShouldUseShrinkWrap2()) {
     if (i + 1 < Count) {
       unsigned NextReg = CSI[i + 1].getReg();
       if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
           (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
         RPI.Reg2 = NextReg;
     }
+    }
 
     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
     // list to come in sorted by frame index so that we can issue the store
@@ -974,22 +1215,29 @@
     //
     // The order of the registers in the list is controlled by
     // getCalleeSavedRegs(), so they will always be in-order, as well.
+    // FIXME: ShrinkWrap2: Make it work with shrink-wrapping.
+    if (!MFI.getShouldUseShrinkWrap2()) {
     assert((!RPI.isPaired() ||
             (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
            "Out of order callee saved regs!");
+    }
 
     // MachO's compact unwind format relies on all registers being stored in
     // adjacent register pairs.
+    // FIXME: ShrinkWrap2: Fix compact unwind format.
+    if (!MFI.getShouldUseShrinkWrap2()) {
     assert((!produceCompactUnwindFrame(MF) ||
             CC == CallingConv::PreserveMost ||
             (RPI.isPaired() &&
              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
               RPI.Reg1 + 1 == RPI.Reg2))) &&
            "Callee-save registers not saved as adjacent register pair!");
+    }
 
     RPI.FrameIdx = CSI[i].getFrameIdx();
 
-    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
+    // FIXME: ShrinkWrap2: We are never using pairs.
+    if (!MFI.getShouldUseShrinkWrap2() && Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
       // Round up size of non-pair to pair size if we need to pad the
       // callee-save area to ensure 16-byte alignment.
       Offset -= 16;
@@ -1000,6 +1248,13 @@
       Offset -= RPI.isPaired() ? 16 : 8;
     assert(Offset % 8 == 0);
     RPI.Offset = Offset / 8;
+
+    // FIXME: ShrinkWrap2: This is unused through the whole backend. Instead, we
+    // have the RegisterPairInfo.
+    MFI.setObjectSize(RPI.FrameIdx, 8);
+    MFI.setObjectOffset(RPI.FrameIdx, RPI.Offset);
+
+    // FIXME: ShrinkWrap2: Check for out of bounds ofsets for STR/STUR/etc?
     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
            "Offset out of bounds for LDP/STP immediate");
 
@@ -1009,17 +1264,36 @@
   }
 }
 
+// FIXME: ShrinkWrap2: We need this here because we have to call
+// computeCalleeSaveRegisterPairs once after frame indices have been assigned.
+void AArch64FrameLowering::processValidCalleeSavedInfo(
+    MachineFunction &MF, const TargetRegisterInfo *TRI,
+    std::vector<CalleeSavedInfo> &CSI) const {
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  computeCalleeSaveRegisterPairs(MF, CSI, TRI, AFI->getRegPairs());
+}
+
 bool AArch64FrameLowering::spillCalleeSavedRegisters(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
     const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  DebugLoc DL;
-  SmallVector<RegPairInfo, 8> RegPairs;
-
-  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
   const MachineRegisterInfo &MRI = MF.getRegInfo();
+  DebugLoc DL;
+  SmallVectorImpl<RegPairInfo> &RegPairs = AFI->getRegPairs();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  bool needsCFI =
+      MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry();
+  // FIXME: ShrinkWrap2: We should always use AFI->getRegPairs(), or at least
+  // avoid calling computeCalleeSaveRegisterPair more than once.
+  if (!MFI.getShouldUseShrinkWrap2()) {
+    RegPairs.clear();
+    computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+  }
 
   for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
        ++RPII) {
@@ -1028,6 +1302,15 @@
     unsigned Reg2 = RPI.Reg2;
     unsigned StrOpc;
 
+    // FIXME: ShrinkWrap2: Skip all the registers that are not related to this
+    // block.
+    if (MFI.getShouldUseShrinkWrap2()) {
+      if (find_if(CSI, [&](const CalleeSavedInfo &CS) {
+            return CS.getReg() == Reg1;
+          }) == CSI.end())
+        continue;
+    }
+
     // Issue sequence of spills for cs regs.  The first spill may be converted
     // to a pre-decrement store later by emitPrologue if the callee-save stack
     // area allocation can't be combined with the local stack area allocation.
@@ -1050,6 +1333,27 @@
             dbgs() << ", " << RPI.FrameIdx+1;
           dbgs() << ")\n");
 
+    // FIXME: ShrinkWrap2: We need to decide wether to use SP or FP-relative
+    // store / load here. In order to do that, we have several factors:
+    // * If we don't use shrink-wrapping, always use SP.
+    // * If we don't have a frame, always use SP.
+    // * If it's the entry block, do not use SP, because we might have SP
+    // adjustments before / after.
+    // * If we don't have a frame, and we have local variables, and we *have* to
+    // use SP, then we have to keep track of the offsets that are used to store
+    // / load the CSR, and update them during prologue emission, where we have
+    // the information about the local stack size.
+    bool isEntryBlock = &MF.front() == &MBB;
+    bool ShouldUseSP = !hasFP(*MBB.getParent()) || isEntryBlock;
+    int CSStackSize = AFI->getCalleeSavedStackSize();
+    int Imm = -(CSStackSize - 16 - int(RPI.Offset) * 8) / 8;
+    if (MFI.getShouldUseShrinkWrap2() && !ShouldUseSP) {
+      if (StrOpc == AArch64::STRXui)
+        StrOpc = AArch64::STURXi;
+      else if (StrOpc == AArch64::STRDui)
+        StrOpc = AArch64::STURDi;
+      Imm *= 8;
+    }
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
     if (!MRI.isReserved(Reg1))
       MBB.addLiveIn(Reg1);
@@ -1061,14 +1365,42 @@
           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
           MachineMemOperand::MOStore, 8, 8));
     }
-    MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
-        .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
-        .setMIFlag(MachineInstr::FrameSetup);
+    if (MFI.getShouldUseShrinkWrap2()) {
+      if (ShouldUseSP) {
+        MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+            .addReg(AArch64::SP)
+            .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+            .setMIFlag(MachineInstr::FrameSetup);
+        MachineInstr *MI = MIB;
+        if (&MBB != &MF.front())
+          AFI->getCSROffsetsToFix().push_back(&MI->getOperand(2));
+      } else {
+        MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+            .addReg(AArch64::FP)
+            .addImm(Imm) // [sp, #offset*8], where factor*8 is implicit
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
+    } else {
+      MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+          .addReg(AArch64::SP)
+          .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
     MIB.addMemOperand(MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
         MachineMemOperand::MOStore, 8, 8));
+  if (MFI.getShouldUseShrinkWrap2() && needsCFI) {
+    int64_t Offset = ((-(CSStackSize - 16 - int(RPI.Offset) * 8) / 8) - 2) * 8;
+    const MCRegisterInfo *MCRI = STI.getRegisterInfo();
+    unsigned DwarfReg = MCRI->getDwarfRegNum(Reg1, true);
+    unsigned CFIIndex = MF.addFrameInst(
+        MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+    BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex)
+        .setMIFlag(MachineInstr::FrameSetup);
   }
+  }
+
   return true;
 }
 
@@ -1077,14 +1409,26 @@
     const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc DL;
-  SmallVector<RegPairInfo, 8> RegPairs;
+  SmallVectorImpl<RegPairInfo> &RegPairs = AFI->getRegPairs();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const MCRegisterInfo *MRI = STI.getRegisterInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  bool needsCFI =
+      MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry();
 
   if (MI != MBB.end())
     DL = MI->getDebugLoc();
 
-  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+  // FIXME: ShrinkWrap2: We should always use AFI->getRegPairs(), or at least
+  // avoid  calling computeCalleeSaveRegisterPair more than once.
+  if (!MFI.getShouldUseShrinkWrap2()) {
+    RegPairs.clear();
+    computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
+  }
 
   for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
        ++RPII) {
@@ -1092,6 +1436,15 @@
     unsigned Reg1 = RPI.Reg1;
     unsigned Reg2 = RPI.Reg2;
 
+    // FIXME: ShrinkWrap2: Skip all the registers that are not related to this
+    // block.
+    if (MFI.getShouldUseShrinkWrap2()) {
+      if (find_if(CSI, [&](const CalleeSavedInfo &CS) {
+            return CS.getReg() == Reg1;
+          }) == CSI.end())
+        continue;
+    }
+
     // Issue sequence of restores for cs regs. The last restore may be converted
     // to a post-increment load later by emitEpilogue if the callee-save stack
     // area allocation can't be combined with the local stack area allocation.
@@ -1113,6 +1466,19 @@
             dbgs() << ", " << RPI.FrameIdx+1;
           dbgs() << ")\n");
 
+    // FIXME: ShrinkWrap2: See comment in spillCalleeSavedRegisters.
+    bool isReturnBlock = MBB.isReturnBlock();
+    bool ShouldUseSP = !hasFP(*MBB.getParent()) || isReturnBlock;
+    int CSStackSize = AFI->getCalleeSavedStackSize();
+    int Imm = -(CSStackSize - 16 - int(RPI.Offset) * 8) / 8;
+    if (MFI.getShouldUseShrinkWrap2() && !ShouldUseSP) {
+      if (LdrOpc == AArch64::LDRXui)
+        LdrOpc = AArch64::LDURXi;
+      else if (LdrOpc == AArch64::LDRDui)
+        LdrOpc = AArch64::LDURDi;
+      Imm *= 8;
+    }
+
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
     if (RPI.isPaired()) {
       MIB.addReg(Reg2, getDefRegState(true));
@@ -1120,13 +1486,41 @@
           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
           MachineMemOperand::MOLoad, 8, 8));
     }
-    MIB.addReg(Reg1, getDefRegState(true))
-        .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
-        .setMIFlag(MachineInstr::FrameDestroy);
+    if (MFI.getShouldUseShrinkWrap2()) {
+      if (ShouldUseSP) {
+        MIB.addReg(Reg1, getDefRegState(true))
+            .addReg(AArch64::SP)
+            .addImm(
+                RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+            .setMIFlag(MachineInstr::FrameDestroy);
+        MachineInstr *MI = MIB;
+        if (!MBB.isReturnBlock())
+          AFI->getCSROffsetsToFix().push_back(&MI->getOperand(2));
+      } else {
+        MIB.addReg(Reg1, getDefRegState(true))
+            .addReg(AArch64::FP)
+            .addImm(Imm) // [sp, #offset*8], where factor*8 is implicit
+            .setMIFlag(MachineInstr::FrameDestroy);
+      }
+    } else {
+      MIB.addReg(Reg1, getDefRegState(true))
+          .addReg(AArch64::SP)
+          .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+          .setMIFlag(MachineInstr::FrameDestroy);
+    }
+
     MIB.addMemOperand(MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
         MachineMemOperand::MOLoad, 8, 8));
+
+    if (MFI.getShouldUseShrinkWrap2() && needsCFI) {
+      unsigned DwarfReg = MRI->getDwarfRegNum(Reg1, true);
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfReg));
+      BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex)
+          .setMIFlag(MachineInstr::FrameDestroy);
+  }
   }
   return true;
 }
@@ -1243,7 +1637,17 @@
 
   // Round up to register pair alignment to avoid additional SP adjustment
   // instructions.
+}
+
+bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
+    MachineFunction &MF, const TargetRegisterInfo *TRI,
+    std::vector<CalleeSavedInfo> &CSI) const {
+  // FIXME: ShrinkWrap2: This is only a hack to delay the computation of
+  // NumRegsSpilled.
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  unsigned NumRegsSpilled = CSI.size();
   AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+  return false;
 }
 
 bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -1251,3 +1655,8 @@
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   return AFI->hasCalleeSaveStackFreeSpace();
 }
+
+std::unique_ptr<ShrinkWrapInfo>
+AArch64FrameLowering::createCSRShrinkWrapInfo(const MachineFunction &MF) const {
+  return llvm::make_unique<AArch64CSRShrinkWrapInfo>(MF);
+}
Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
===================================================================
--- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -637,6 +637,8 @@
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
                                       const LdStPairFlags &Flags) {
+  // FIXME: ShrinkWrap2: Add optimization remarks to see when we miss forming a
+  // pair.
   MachineBasicBlock::iterator NextI = I;
   ++NextI;
   // If NextI is the second of the two instructions to be merged, we need
Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h
===================================================================
--- lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -23,6 +23,21 @@
 
 namespace llvm {
 
+class MachineOperand;
+
+/// This contains register pairs computed for callee-save saves / restores.
+struct RegPairInfo {
+  unsigned Reg1 = AArch64::NoRegister;
+  unsigned Reg2 = AArch64::NoRegister;
+  int FrameIdx;
+  int Offset;
+  bool IsGPR;
+
+  RegPairInfo() = default;
+
+  bool isPaired() const { return Reg2 != llvm::AArch64::NoRegister; }
+};
+
 /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
 /// contains private AArch64-specific information for each MachineFunction.
 class AArch64FunctionInfo final : public MachineFunctionInfo {
@@ -44,6 +59,7 @@
 
   /// HasStackFrame - True if this function has a stack frame. Set by
   /// determineCalleeSaves().
+  // FIXME: ShrinkWrap2: This should not be set in determineCalleeSaves...
   bool HasStackFrame = false;
 
   /// \brief Amount of stack frame size, not including callee-saved registers.
@@ -88,6 +104,17 @@
   /// other stack allocations.
   bool CalleeSaveStackHasFreeSpace = false;
 
+  // FIXME: ShrinkWrap2: This should be replaced with MFI.Objects.
+  /// Register pairs computed for CSR save / restore.
+  SmallVector<RegPairInfo, 8> RegPairs;
+
+  // FIXME: ShrinkWrap2: The offsets that probably need to be fixed are
+  // collected during spillCalleeSavedRegisters but need to be fixed during
+  // emitPrologue.
+  /// Machine operands representing SP-related offsets to CSRs, that need to be
+  /// fixed if local stack allocation happens afterwards.
+  SmallVector<MachineOperand*, 8> CSROffsetsToFix;
+
 public:
   AArch64FunctionInfo() = default;
 
@@ -116,6 +143,11 @@
     CalleeSaveStackHasFreeSpace = s;
   }
 
+  SmallVectorImpl<RegPairInfo> &getRegPairs() { return RegPairs; }
+  SmallVectorImpl<MachineOperand *> &getCSROffsetsToFix() {
+    return CSROffsetsToFix;
+  }
+
   bool isSplitCSR() const { return IsSplitCSR; }
   void setIsSplitCSR(bool s) { IsSplitCSR = s; }
 
Index: lib/Target/X86/X86FrameLowering.h
===================================================================
--- lib/Target/X86/X86FrameLowering.h
+++ lib/Target/X86/X86FrameLowering.h
@@ -177,6 +177,9 @@
                               MachineBasicBlock::iterator MBBI,
                               const DebugLoc &DL, bool RestoreSP = false) const;
 
+  std::unique_ptr<ShrinkWrapInfo>
+  createCSRShrinkWrapInfo(const MachineFunction &MF) const override;
+
 private:
   uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
 
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -35,6 +35,92 @@
 
 using namespace llvm;
 
+class X86CSRShrinkWrapInfo final : public ShrinkWrapInfo {
+  /// Number of bits the result needs.
+  unsigned NumCSRs = 0;
+public:
+  unsigned getNumResultBits() const override { return NumCSRs; }
+
+  X86CSRShrinkWrapInfo(const MachineFunction &MF) : ShrinkWrapInfo(MF) {
+    bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit();
+    auto TRI = static_cast<const X86RegisterInfo *>(
+        MF.getSubtarget().getRegisterInfo());
+    const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+    unsigned BasePtrIndex = static_cast<unsigned>(-1);
+    unsigned RBPIndex = static_cast<unsigned>(-1);
+    // Count the number of CSRs.
+    unsigned BasePtr = TRI->getBaseRegister();
+    if (Is64Bit && BasePtr == X86::EBX)
+      BasePtr = X86::RBX;
+    unsigned FramePtr = TRI->getFramePtr();
+    if (Is64Bit && FramePtr == X86::EBP)
+      FramePtr = X86::RBP;
+    // FIXME: ShrinkWrap2: Fix HHVM, which has only R12 as a CSR.
+    for (unsigned i = 0; CSRegs[i]; ++i) {
+      if (CSRegs[i] == FramePtr)
+        RBPIndex = i;
+      else if (CSRegs[i] == BasePtr)
+        BasePtrIndex = i;
+      ++NumCSRs;
+    }
+
+    determineCSRUses();
+
+    // FIXME: ShrinkWrap2: const_cast
+    MachineFrameInfo &MFI = const_cast<MachineFrameInfo &>(MF.getFrameInfo());
+
+    // FIXME: ShrinkWrap2: This is a copy of the code in determineCalleeSaves.
+    // It also feels like there should not be any side effects done here.
+    // FIXME: ShrinkWrap2: const_cast
+    auto X86FI = const_cast<X86MachineFunctionInfo *>(
+        MF.getInfo<X86MachineFunctionInfo>());
+    int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+    auto SlotSize = TRI->getSlotSize();
+
+    if (TailCallReturnAddrDelta < 0) {
+      // create RETURNADDR area
+      //   arg
+      //   arg
+      //   RETADDR
+      //   { ...
+      //     RETADDR area
+      //     ...
+      //   }
+      //   [EBP]
+      MFI.CreateFixedObject(-TailCallReturnAddrDelta,
+                            TailCallReturnAddrDelta - SlotSize, true);
+    }
+
+    // Spill the BasePtr if it's used.
+    if (TRI->hasBasePointer(MF)) {
+      auto &SavedRegs = Uses[MF.front().getNumber()];
+      if (SavedRegs.empty())
+        SavedRegs.resize(getNumResultBits());
+      SavedRegs.set(BasePtrIndex);
+
+      // Allocate a spill slot for EBP if we have a base pointer and EH
+      // funclets.
+      if (MF.hasEHFunclets()) {
+        int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize);
+        X86FI->setHasSEHFramePtrSave(true);
+        X86FI->setSEHFramePtrSaveIndex(FI);
+      }
+    }
+
+    // X86FrameLowering::EmitPrologue spills RBP manually. Remove it from the
+    // uses.
+    for (BitVector &BV : Uses)
+      if (!BV.empty())
+        BV.reset(RBPIndex);
+  }
+
+  raw_ostream &printElt(unsigned Elt, raw_ostream &OS) const override {
+    auto &TRI = *MF.getSubtarget().getRegisterInfo();
+    OS << PrintReg(TRI.getCalleeSavedRegs(&MF)[Elt], &TRI);
+    return OS;
+  }
+};
+
 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
                                    unsigned StackAlignOverride)
     : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
@@ -1070,7 +1156,12 @@
     if (X86FI->getRestoreBasePointer())
       FrameSize += SlotSize;
 
-    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+    NumBytes = FrameSize;
+    // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now
+    // have to include the callee saves in our frame size, so that our sp
+    // displacement can be updated properly.
+    if (!MFI.getShouldUseShrinkWrap2())
+      NumBytes -= X86FI->getCalleeSavedFrameSize();
 
     // Callee-saved registers are pushed on stack before the stack is realigned.
     if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
@@ -1128,7 +1219,12 @@
     }
   } else {
     assert(!IsFunclet && "funclets without FPs not yet implemented");
-    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+    NumBytes = StackSize;
+    // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now
+    // have to include the callee saves in our frame size, so that our sp
+    // displacement can be updated properly.
+    if (!MFI.getShouldUseShrinkWrap2())
+      NumBytes -= X86FI->getCalleeSavedFrameSize();
   }
 
   // For EH funclets, only allocate enough space for outgoing calls. Save the
@@ -1141,6 +1237,10 @@
   bool PushedRegs = false;
   int StackOffset = 2 * stackGrowth;
 
+  // FIXME: Add CFI for all the callee saved registers. Since the saves /
+  // restores are not at the beginning of the function, we need to go through
+  // all the basic blocks.
+
   while (MBBI != MBB.end() &&
          MBBI->getFlag(MachineInstr::FrameSetup) &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -1572,7 +1672,12 @@
   } else if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
-    NumBytes = FrameSize - CSSize;
+    NumBytes = FrameSize;
+    // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now
+    // have to include the callee saves in our frame size, so that our sp
+    // displacement can be updated properly.
+    if (!MFI.getShouldUseShrinkWrap2())
+      NumBytes -= CSSize;
 
     // Callee-saved registers were pushed on stack before the stack was
     // realigned.
@@ -1584,7 +1689,13 @@
             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr)
         .setMIFlag(MachineInstr::FrameDestroy);
   } else {
-    NumBytes = StackSize - CSSize;
+    NumBytes = StackSize;
+    // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now
+    // have to include the callee saves in our frame size, so that our sp
+    // displacement can be updated properly.
+    if (!MFI.getShouldUseShrinkWrap2())
+      NumBytes -= CSSize;
+
   }
   uint64_t SEHStackAllocAmt = NumBytes;
 
@@ -1645,6 +1756,12 @@
     unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
     uint64_t LEAAmount =
         IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
+    // FIXME: ShrinkWrap2: Here, we can't assume we are going to pop all the
+    // callee saves (because we aren't, we actually move them back, then adjust
+    // the stack), so we just want to restore the stack pointer. This should go
+    // away at some point...
+    if (MFI.getShouldUseShrinkWrap2())
+      LEAAmount = 0;
 
     // There are only two legal forms of epilogue:
     // - add SEHAllocationSize, %rsp
@@ -1937,6 +2054,11 @@
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
     const std::vector<CalleeSavedInfo> &CSI,
     const TargetRegisterInfo *TRI) const {
+  // FIXME: ShrinkWrap2: Save using this function when it's adapted to work
+  // without push / pop.
+  if (MBB.getParent()->getFrameInfo().getShouldUseShrinkWrap2())
+    return false;
+
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
@@ -2003,6 +2125,11 @@
                                                MachineBasicBlock::iterator MI,
                                         const std::vector<CalleeSavedInfo> &CSI,
                                           const TargetRegisterInfo *TRI) const {
+  // FIXME: ShrinkWrap2: Restore using this function when it's adapted to work
+  // without push / pop.
+  if (MBB.getParent()->getFrameInfo().getShouldUseShrinkWrap2())
+    return false;
+
   if (CSI.empty())
     return false;
 
@@ -3039,3 +3166,8 @@
                     UnwindHelpFI)
       .addImm(-2);
 }
+
+std::unique_ptr<ShrinkWrapInfo>
+X86FrameLowering::createCSRShrinkWrapInfo(const MachineFunction &MF) const {
+  return llvm::make_unique<X86CSRShrinkWrapInfo>(MF);
+}
Index: test/CodeGen/AArch64/ShrinkWrapping/AliasInRegMask.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/AliasInRegMask.mir
@@ -0,0 +1,26 @@
+# RUN: llc -mtriple=aarch64-- -run-pass prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  declare void @f0() nounwind
+  define void @f1() nounwind { ret void }
+...
+---
+name:            f1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    CBNZW %wzr, %bb.2
+    B %bb.1
+
+  bb.1:
+    TCRETURNdi @f0, 0, csr_aarch64_aapcs, implicit %sp
+
+  bb.2:
+    RET_ReallyLR
+...
+# Check that we don't look for aliased regs in RegMasks.
+
+# CHECK-LABEL: f1
+# CHECK-NOT: Uses:
Index: test/CodeGen/AArch64/ShrinkWrapping/CFIStackFrame.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/CFIStackFrame.mir
@@ -0,0 +1,28 @@
+# RUN: llc -filetype obj -mtriple=arm64-apple-ios10.3.0 -run-pass=prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  declare void @f0() nounwind
+  define void @f1() nounwind { ret void }
+...
+---
+name:            f1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    CBNZW %wzr, %bb.2
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    BL @f0, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+
+  bb.2:
+    RET_ReallyLR
+...
+# CHECK-LABEL: f1
+# CHECK-NOT: Insufficient CFI instructions to define a frame!
Index: test/CodeGen/AArch64/ShrinkWrapping/CSRUsedOnTerminator.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/CSRUsedOnTerminator.mir
@@ -0,0 +1,52 @@
+# RUN: llc -mtriple=aarch64-- -run-pass prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %nzcv = IMPLICIT_DEF
+    Bcc 0, %bb.1, implicit killed %nzcv
+    B %bb.2
+
+  bb.1:
+    RET_ReallyLR
+
+  bb.2:
+    successors: %bb.3, %bb.4
+
+    %x21 = IMPLICIT_DEF
+
+    %nzcv = IMPLICIT_DEF
+    Bcc 0, %bb.3, implicit killed %nzcv
+    B %bb.4
+
+  bb.3:
+    RET_ReallyLR
+
+  bb.4:
+    liveins: %x21
+    successors: %bb.5, %bb.6
+
+    CBZX killed %x21, %bb.5
+    B %bb.6
+
+  bb.5:
+    RET_ReallyLR
+
+  bb.6:
+    RET_ReallyLR
+...
+# Check that we mark uses in terminator instructions as used in all the successors as well.
+
+# CHECK-LABEL: f0
+
+# CHECK: BB#2 uses : %X21
+# CHECK-NEXT: BB#4 uses : %X21
+# CHECK-NEXT: BB#5 uses : %X21
+# CHECK-NEXT: BB#6 uses : %X21
Index: test/CodeGen/AArch64/ShrinkWrapping/CompactUnwindingFPSPPair.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/CompactUnwindingFPSPPair.mir
@@ -0,0 +1,44 @@
+# RUN: llc -filetype obj -mtriple=arm64-apple-ios10.3.0 -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o - 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+liveins:
+  - { reg: '%x1' }
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.3
+    liveins: %x1
+
+    %x19 = COPY %x1
+    CBNZW %wzr, %bb.3
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2
+    liveins: %x19
+
+
+  bb.2:
+    successors: %bb.2, %bb.3
+    liveins: %x19
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    %x1 = COPY %x19
+    BL @f0, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit undef %x0, implicit %x1, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+    dead %xzr = SUBSXri undef %x8, 8, 0, implicit-def %nzcv
+    Bcc 12, %bb.2, implicit killed %nzcv
+    B %bb.3
+
+  bb.3:
+    RET_ReallyLR
+
+...
+# Check that we're not trying to produce compact unwinding when FP and LR are split.
+
+# CHECK-LABEL: f0
+# CHECK-NOT: Pushing invalid registers for frame!
Index: test/CodeGen/AArch64/ShrinkWrapping/DetermineCalleeSavesSideEffects.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/DetermineCalleeSavesSideEffects.mir
@@ -0,0 +1,37 @@
+# RUN: llc -march=aarch64 -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false -run-pass=prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  declare void @f1() #0
+  define void @f0() #1 { ret void }
+
+  attributes #0 = { nounwind "target-cpu"="cortex-a57" }
+  attributes #1 = { nounwind "no-frame-pointer-elim-non-leaf" "target-cpu"="cortex-a57" }
+
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    CBNZW %wzr, %bb.2
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    BL @f1, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+
+  bb.2:
+    RET_ReallyLR
+...
+# Check that while we look for CSRs, we set the appropriate internal state of AArch64MachineFunction.
+
+# CHECK-LABEL: f0
+# CHECK-NOT: unexpected function without stacke frame but with FP
+# CHECK: BB#1 uses : %LR
+# CHECK: **** Shrink-wrapping results
+# CHECK-NEXT: BB#1: Saves: %LR, | Restores: %LR,
Index: test/CodeGen/AArch64/ShrinkWrapping/FirstMBBNum2.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/FirstMBBNum2.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-- -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs -enable-shrink-wrap2=true -debug-only=shrink-wrap2 %s -o - 2>&1 | FileCheck %s
+; FIXME: ShrinkWrap2: use MIR once we fix stack protector assert.
+; REQUIRES: asserts
+; This test causes the first MBB ID to be 2, which provoked a bug.
+
+; CHECK-LABEL: ABIi128
+
+; CHECK: BB#2 uses : %LR
+; CHECK: **** Shrink-wrapping results
+; CHECK-NEXT: BB#2: Saves: %LR, | Restores: %LR,
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--"
+
+define i128 @ABIi128(i128 %arg1) nounwind {
+  %res = fptoui fp128 undef to i128
+  ret i128 %res
+}
Index: test/CodeGen/AArch64/ShrinkWrapping/NoPostPreLoadStore.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/NoPostPreLoadStore.mir
@@ -0,0 +1,38 @@
+# RUN: llc -mtriple=arm64-apple-ios -debug-only=shrink-wrap2 -run-pass=prologepilog %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+
+--- |
+  define void @f0() nounwind { ret void }
+  declare void @f1() nounwind
+  declare void @f2() nounwind
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    CBNZW %wzr, %bb.2
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    BL @f1, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+
+  bb.2:
+    TCRETURNdi @f2, 0, csr_aarch64_aapcs, implicit %sp
+
+...
+
+# This test makes sure that we don't convert callee save save / restores from
+# store / load to pre / post increment load store.
+
+# CHECK-LABEL: f0
+# CHECK-NOT: This is not a register operand
+# CHECK: BB#1 uses : %LR
+# CHECK: **** Shrink-wrapping results
+# CHECK-NEXT: BB#1: Saves: %LR, | Restores: %LR,
Index: test/CodeGen/AArch64/ShrinkWrapping/NoStackObjects.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/ShrinkWrapping/NoStackObjects.mir
@@ -0,0 +1,53 @@
+# RUN: llc -filetype obj -mtriple=arm64-apple-ios10.3.0 -run-pass=prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { entry: ret void }
+  declare void @f1()
+...
+---
+name:            f0
+tracksRegLiveness: true
+liveins:
+  - { reg: '%d0' }
+  - { reg: '%d1' }
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+    liveins: %d0, %d1
+
+    dead %wzr = SUBSWri undef %w8, 0, 0, implicit-def %nzcv
+    Bcc 12, %bb.2, implicit killed %nzcv
+    B %bb.1
+
+  bb.1:
+    successors: %bb.4, %bb.3
+    liveins: %d0, %d1
+
+    CBNZW %wzr, %bb.4
+    B %bb.3
+
+  bb.2:
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    %x3 = COPY %sp
+    BL @f1, csr_aarch64_aapcs_thisreturn, implicit-def dead %lr, implicit %sp, implicit undef %x0, implicit undef %x1, implicit undef %x2, implicit killed %x3, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+
+  bb.3:
+    successors: %bb.4
+    liveins: %d0, %d1
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    %x3 = COPY %sp
+    %w4 = MOVi32imm 70
+    %w5 = COPY %wzr
+    BL @f1, csr_aarch64_aapcs_thisreturn, implicit-def dead %lr, implicit %sp, implicit undef %x0, implicit %d0, implicit %d1, implicit undef %x1, implicit undef %x2, implicit killed %x3, implicit undef %d2, implicit killed %w4, implicit killed %w5, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+
+  bb.4:
+    %w0 = MOVi32imm 1
+    RET_ReallyLR implicit killed %w0
+...
+# Check that we don't use the stack objects in the AArch64 backend.
+
+# CHECK-LABEL: f0
+# CHECK-NOT: Getting frame offset for a dead object?
Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
===================================================================
--- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-fp-elim -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
+; XFAIL: *
 
 ; This test aims to check basic correctness of frame layout &
 ; frame access code. There are 8 functions in this test file,
@@ -660,6 +661,7 @@
   ret void
 }
 
+; FIXME: ShrinkWrap2: This fails because we don't combine the two sp displacements.
 ; CHECK-LABEL: realign_conditional
 ; No realignment in the prologue.
 ; CHECK-NOT:  and
Index: test/CodeGen/AArch64/alloca.ll
===================================================================
--- test/CodeGen/AArch64/alloca.ll
+++ test/CodeGen/AArch64/alloca.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s
 ; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This fails with shrink-wrapping enabled because we don't
+; care about compact unwinding, and we don't force x20 to be spilled anyway.
 
 declare void @use_addr(i8*)
 
Index: test/CodeGen/AArch64/arm64-aapcs-be.ll
===================================================================
--- test/CodeGen/AArch64/arm64-aapcs-be.ll
+++ test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=false < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=true < %s | FileCheck %s
+; XFAIL: This test fails with shrink-wrapping enabled, because with pairs enabled, if we have only one register, it will have a 16B alignment, so we use 32B for the stack instead of just 8B. See computeCalleeSaveRegisterPairs.
 
 ; Check narrow argument passing via stack - callee end
 define i32 @test_narrow_args_callee(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s) #0 {
Index: test/CodeGen/AArch64/arm64-abi_align.ll
===================================================================
--- test/CodeGen/AArch64/arm64-abi_align.ll
+++ test/CodeGen/AArch64/arm64-abi_align.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping enabled because we
+; don't combine SP updates.
 
 ; rdar://12648441
 ; Generated from arm64-arguments.c with -O2.
Index: test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
===================================================================
--- test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping enabled because we
+; don't save LR, since there are no calls.
 
 ; CHECK: foo
 ; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
Index: test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
===================================================================
--- test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
+++ test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
@@ -1,3 +1,4 @@
+; FIXME: ShrinkWrap2: .ll -> .mir when stack protector stuff is fixed.
 ; RUN: llc -mtriple="arm64-apple-ios" < %s | FileCheck %s
 ;
 ; Check that the dead register definition pass is considering implicit defs.
Index: test/CodeGen/AArch64/arm64-fp128.ll
===================================================================
--- test/CodeGen/AArch64/arm64-fp128.ll
+++ test/CodeGen/AArch64/arm64-fp128.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-enable-atomic-cfg-tidy=0 < %s | FileCheck %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping enabled because we
+; insert a restore point between a cmp and a jump.
 
 @lhs = global fp128 zeroinitializer, align 16
 @rhs = global fp128 zeroinitializer, align 16
Index: test/CodeGen/AArch64/arm64-hello.ll
===================================================================
--- test/CodeGen/AArch64/arm64-hello.ll
+++ test/CodeGen/AArch64/arm64-hello.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra -disable-fp-elim | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping because we don't
+; combine SP updates.
 
 ; CHECK-LABEL: main:
 ; CHECK:	sub	sp, sp, #32
Index: test/CodeGen/AArch64/arm64-join-reserved.ll
===================================================================
--- test/CodeGen/AArch64/arm64-join-reserved.ll
+++ test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping enabled because we
+; don't spill x29, so we merge the store of x30 with wrz.
 target triple = "arm64-apple-macosx10"
 
 ; Make sure that a store to [sp] addresses off sp directly.
Index: test/CodeGen/AArch64/arm64-large-frame.ll
===================================================================
--- test/CodeGen/AArch64/arm64-large-frame.ll
+++ test/CodeGen/AArch64/arm64-large-frame.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim -disable-post-ra < %s | FileCheck %s
+; XFAIL: *
+; FIXME: ShrinkWrap2: This test fails with shrink-wrapping enabled because we
+; don't save LR.
 declare void @use_addr(i8*)
 
 @addr = global i8* null
Index: test/CodeGen/X86/ShrinkWrapping/BasicBranch.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/BasicBranch.mir
@@ -0,0 +1,37 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog -debug-only=shrink-wrap2 %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.3
+
+  bb.2:
+    RET 0
+
+  bb.3:
+    %rbx = IMPLICIT_DEF
+    RET 0
+...
+# Basic shrink-wrapping example. Early return with uses of CSRs in the body.
+#CHECK-LABEL: f0
+
+#CHECK: BB#1 uses : %RBX
+#CHECK-NEXT: BB#3 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/CriticalEdge.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/CriticalEdge.mir
@@ -0,0 +1,42 @@
+# RUN: llc -march=x86 -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# This is a reduced test case from test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.1:
+    RET 0
+
+  bb.2:
+    RET 0
+
+  bb.3:
+    successors: %bb.4, %bb.2
+
+    %esi = IMPLICIT_DEF
+
+    %eflags = IMPLICIT_DEF
+    JGE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors:%bb.1, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.1, implicit killed %eflags
+    JMP_1 %bb.2
+...
+#CHECK-LABEL: f0
+
+#CHECK: BB#3 uses : %ESI
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#3: Saves: %ESI, | Restores: %ESI,
Index: test/CodeGen/X86/ShrinkWrapping/CriticalEdge2.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/CriticalEdge2.mir
@@ -0,0 +1,36 @@
+# RUN: llc -march=x86 -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# This is a reduced test case from test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.4, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.4
+
+    %ebx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.3:
+    RET 0
+
+  bb.4:
+    RET 0
+...
+#CHECK-LABEL: f0
+
+#CHECK: BB#1 uses : %EBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %EBX, | Restores: %EBX,
Index: test/CodeGen/X86/ShrinkWrapping/CriticalEdgeLoop.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/CriticalEdgeLoop.mir
@@ -0,0 +1,52 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# This is a reduced test case from test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.3, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.4
+
+    JMP_1 %bb.4
+
+  bb.2:
+
+  bb.3:
+    successors: %bb.4, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.6, %bb.5
+
+    %rbx = IMPLICIT_DEF
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.6, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    RET 0
+
+  bb.6:
+    RET 0
+
+...
+#CHECK-LABEL: f0
+
+#CHECK: BB#4 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#4: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/InfiniteLoop.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/InfiniteLoop.mir
@@ -0,0 +1,36 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: %edi
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    RET 0
+
+  bb.2:
+    successors: %bb.3
+
+    %rbx = IMPLICIT_DEF
+
+  bb.3:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+...
+# Check that we don't save on a branch that never returns.
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK-NEXT: Remove uses from no-return BB#2
+#CHECK-NOT: Saves:
+#CHECK-NOT: restores:
Index: test/CodeGen/X86/ShrinkWrapping/IrreducibleCFG.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/IrreducibleCFG.mir
@@ -0,0 +1,81 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.10, %bb.6
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.10, implicit killed %eflags
+    JMP_1 %bb.6
+
+  bb.1:
+    successors: %bb.6
+
+    JMP_1 %bb.6
+
+  bb.2:
+    successors: %bb.10
+
+    JMP_1 %bb.10
+
+  bb.3:
+    successors: %bb.4
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.5, %bb.9
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.9, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    RET 0
+
+  bb.6:
+    successors: %bb.2, %bb.7
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.7
+
+  bb.7:
+    successors: %bb.4
+
+    JMP_1 %bb.4
+
+  bb.8:
+    successors: %bb.3, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.9:
+    successors: %bb.4, %bb.8
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.8
+
+  bb.10:
+    successors: %bb.7
+
+    JMP_1 %bb.7
+
+...
+# Check that we handle irreducible loops and save / restore outside them.
+
+#CHECK-LABEL: f0
+#CHECK: BB#2 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#5: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/LoopBasic.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/LoopBasic.mir
@@ -0,0 +1,61 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.2:
+    RET 0
+
+  bb.3:
+    successors: %bb.4, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.6
+
+    JMP_1 %bb.6
+
+  bb.5:
+    successors: %bb.6
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.6
+
+  bb.6:
+    successors: %bb.7, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.7
+
+  bb.7:
+    RET 0
+...
+# Check that we don't save inside loops.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#5 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#7: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/LoopInCondition.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/LoopInCondition.mir
@@ -0,0 +1,40 @@
+# RUN: llc -march=x86 -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# This is a reduced test case from test/CodeGen/X86/2007-11-06-InstrSched.ll.
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.4
+
+    JMP_1 %bb.4
+
+  bb.3:
+    successors: %bb.3, %bb.4
+
+    %esi = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JB_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+
+    RET 0
+...
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %ESI
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %ESI, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %ESI,
Index: test/CodeGen/X86/ShrinkWrapping/LoopNoPreheader.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/LoopNoPreheader.mir
@@ -0,0 +1,57 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  define void @f0() nounwind {
+  entry:
+    ret void
+  }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.4
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.3, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.5
+    RET 0
+
+  bb.5:
+
+    RET 0
+
+...
+# Check that we handle loops with no preheader. This should propagate through
+# the loop's predecessors.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#4 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#5: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/LoopNoPreheaderLatchExit.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/LoopNoPreheaderLatchExit.mir
@@ -0,0 +1,53 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+# XFAIL: *
+--- |
+  define void @f0() nounwind {
+  entry:
+    ret void
+  }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.4
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.3, implicit killed %eflags
+    RET 0
+
+...
+# FIXME: ShrinkWrap2: This test still fails, since there is no way to place a
+# restore outside a loop. This should not be possible in real code.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#3 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/MultipleCriticalEdges.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/MultipleCriticalEdges.mir
@@ -0,0 +1,50 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+--- |
+  define void @f0() nounwind { ret void }
+
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.4, %bb.2
+
+    %ebx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.2:
+    successors: %bb.4, %bb.3
+
+    %ebx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    RET 0
+
+  bb.3:
+    RET 0
+
+...
+# Check that we handle multiple critical edges.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#1 uses : %RBX
+#CHECK-NEXT: BB#2 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
+#CHECK-NEXT: BB#4: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/NestedLoopsCriticalEdges.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/NestedLoopsCriticalEdges.mir
@@ -0,0 +1,64 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=stack-protector -run-pass=prologepilog %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# XFAIL: *
+--- |
+  define void @f0() nounwind {
+  entry:
+    ret void
+  }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.6
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.1, implicit killed %eflags
+    JMP_1 %bb.6
+
+  bb.1:
+    successors: %bb.2, %bb.6
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.6
+
+  bb.2:
+    successors: %bb.3
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.4
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.4, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    successors: %bb.6, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.6, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.6:
+    RET 0
+
+...
+# Mix nested loops and critical edges.
+# FIXME: ShrinkWrap2: This fails because we propagate attributes to the
+# critical edges.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/NoReturnPath.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/NoReturnPath.mir
@@ -0,0 +1,60 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=stack-protector -run-pass=prologepilog %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# This is a reduced test case from test/CodeGen/X86/2009-09-10-SpillComments.ll
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.6, %bb.1
+
+    %rbx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.6, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.3
+    liveins: %rbx
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.2:
+    RET 0
+
+  bb.3:
+    successors: %bb.4
+    liveins: %rbx
+
+  bb.4:
+    successors: %bb.5, %bb.4
+    liveins: %rbx
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    successors: %bb.4
+    liveins: %rbx
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.4
+
+  bb.6:
+    RET 0
+...
+#CHECK-LABEL: f0
+
+#CHECK: BB#0 uses : %RBX
+#CHECK-NEXT: BB#5 uses : %RBX
+#CHECK-NEXT: Remove uses from no-return BB#3
+#CHECK-NEXT: Remove uses from no-return BB#4
+#CHECK-NEXT: Remove uses from no-return BB#5
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/Paper1Figure2CriticalEdge.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/Paper1Figure2CriticalEdge.mir
@@ -0,0 +1,47 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3, %bb.4
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.2:
+    successors: %bb.4
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.4
+
+  bb.3:
+    RET 0
+
+  bb.4:
+
+    %ebx = IMPLICIT_DEF
+    RET 0
+...
+# Fig. 2 in Chow's paper.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK-NEXT: BB#4 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
+#CHECK-NEXT: BB#4: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/Paper2Figure1.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/Paper2Figure1.mir
@@ -0,0 +1,56 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+    JMP_1 %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.5, %bb.4
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.6
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.6
+
+  bb.5:
+    successors: %bb.6
+
+    JMP_1 %bb.6
+
+  bb.6:
+    RET 0
+...
+# Fig 1 in Lupo and Wilken's paper.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK-NEXT: BB#4 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX,
+#CHECK-NEXT: BB#4: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/Paper2Figure2.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/Paper2Figure2.mir
@@ -0,0 +1,120 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.8
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.8, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.7
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.7, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.4, %bb.5
+
+    %ebx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.5
+
+    %ebx = MOV32ri 9
+    JMP_1 %bb.5
+
+  bb.5:
+    successors: %bb.6, %bb.7
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.7, implicit killed %eflags
+    JMP_1 %bb.6
+
+  bb.6:
+    successors: %bb.7
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.7
+
+  bb.7:
+    successors: %bb.15
+
+    JMP_1 %bb.15
+
+  bb.8:
+    successors: %bb.9, %bb.10
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.10, implicit killed %eflags
+    JMP_1 %bb.9
+
+  bb.9:
+    successors: %bb.11
+
+    JMP_1 %bb.11
+
+  bb.10:
+    successors: %bb.11
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.11
+
+  bb.11:
+    successors: %bb.12, %bb.13
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.13, implicit killed %eflags
+    JMP_1 %bb.12
+
+  bb.12:
+    successors: %bb.14
+
+    JMP_1 %bb.14
+
+  bb.13:
+    successors: %bb.14
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.14
+
+  bb.14:
+    successors: %bb.15
+    JMP_1 %bb.15
+
+
+  bb.15:
+    RET 0
+...
+# Fig 2 in Lupo and Wilken's paper.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#3 uses : %RBX
+#CHECK-NEXT: BB#4 uses : %RBX
+#CHECK-NEXT: BB#6 uses : %RBX
+#CHECK-NEXT: BB#10 uses : %RBX
+#CHECK-NEXT: BB#13 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#7: Saves: | Restores: %RBX,
+#CHECK-NEXT: BB#10: Saves: %RBX, | Restores: %RBX,
+#CHECK-NEXT: BB#13: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/PropagateLoopUses.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/PropagateLoopUses.mir
@@ -0,0 +1,113 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.15
+
+    JMP_1 %bb.15
+
+  bb.2:
+    successors: %bb.11
+
+    %r15 = IMPLICIT_DEF
+    %r14 = IMPLICIT_DEF
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.11
+
+  bb.3:
+    successors: %bb.4, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.6
+    liveins: %r14
+
+    %r14 = IMPLICIT_DEF
+    JMP_1 %bb.6
+
+  bb.5:
+    successors: %bb.6
+
+    JMP_1 %bb.6
+
+  bb.6:
+    successors: %bb.7
+
+    JMP_1 %bb.7
+
+  bb.7:
+    successors: %bb.8, %bb.9
+
+    %eflags = IMPLICIT_DEF
+    JA_1 %bb.8, implicit killed %eflags
+    JMP_1 %bb.9
+
+  bb.8:
+    successors: %bb.5, %bb.7
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.7
+
+  bb.9:
+    successors: %bb.10, %bb.7
+    liveins: %rbx
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.7, implicit killed %eflags
+    JMP_1 %bb.10
+
+  bb.10:
+    successors: %bb.11
+
+
+  bb.11:
+    successors: %bb.12, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.12, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.12:
+    successors: %bb.13, %bb.14
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.14, implicit killed %eflags
+
+  bb.13:
+    successors: %bb.15
+
+    JMP_1 %bb.15
+
+  bb.14:
+    RET 0
+
+  bb.15:
+    RET 0
+...
+# Check that we propagate the loop uses to its predecessors and successors.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX, %R14, %R15
+#CHECK-NEXT: BB#10 uses : %R14
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#2: Saves: %RBX, %R14, %R15, | Restores: %RBX, %R15
+#CHECK-NEXT: BB#13: Saves: | Restores: %R14
+#CHECK-NEXT: BB#14: Saves: | Restores: %R14
Index: test/CodeGen/X86/ShrinkWrapping/SCCCriticalEdge.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/SCCCriticalEdge.mir
@@ -0,0 +1,48 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+# XFAIL: *
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.1, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.1:
+    successors: %bb.2
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.2
+
+  bb.2:
+    successors: %bb.3
+
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.3, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    RET 0
+
+...
+# FIXME: ShrinkWrap2: This still fails because we propagate attributes where we
+# could not do it.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#1 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#2: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/SaveBeforeLoop.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/SaveBeforeLoop.mir
@@ -0,0 +1,58 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRE: asserts
+# XFAIL: x86
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.4
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.1, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.1:
+    successors: %bb.2, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.2:
+    successors: %bb.3
+
+    %rbx = IMPLICIT_DEF
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.3, %bb.5
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    successors: %bb.5, %bb.6
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.5, implicit killed %eflags
+    JMP_1 %bb.6
+
+  bb.4:
+    RET 0
+
+  bb.6:
+    RET 0
+...
+# FIXME: ShrinkWrap2: This fails because we propagate attributes where we could
+# avoid doing it.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX
Index: test/CodeGen/X86/ShrinkWrapping/SimpleLoopBranch.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/SimpleLoopBranch.mir
@@ -0,0 +1,40 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.3, %bb.2
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.1:
+    successors: %bb.3, %bb.2
+
+    %rbx = IMPLICIT_DEF
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.2:
+    successors: %bb.1
+
+    JMP_1 %bb.1
+
+  bb.3:
+    RET 0
+...
+# Check that we don't save inside loops.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#1 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#0: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/StackAlignment.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/StackAlignment.mir
@@ -0,0 +1,37 @@
+# RUN: llc -disable-fp-elim -mtriple=x86_64-- -run-pass=prologepilog %s -o - | FileCheck %s
+# REQUIRE: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+stack:
+  - { id: 0, offset: 0, size: 8, alignment: 8 }
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.2, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3
+
+    %rbx = IMPLICIT_DEF
+    %r14 = IMPLICIT_DEF
+    JMP_1 %bb.3
+
+  bb.2:
+    RET 0
+
+  bb.3:
+    liveins: %rbx
+
+    %rax = MOV64rm %stack.0, %rbx, _, 0, _
+    RET 0, %rax
+...
+# Check that we do the stack adjustments instead of pushes.
+#CHECK-LABEL: f0
+#CHECK: %rsp = frame-setup SUB64ri8 %rsp, 16
Index: test/CodeGen/X86/ShrinkWrapping/Tree.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/Tree.mir
@@ -0,0 +1,57 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=prologepilog %s -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.4
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.4, implicit killed %eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.2
+
+  bb.2:
+    %ebx = IMPLICIT_DEF
+    RET 0
+
+  bb.3:
+    %ebx = IMPLICIT_DEF
+    RET 0
+
+  bb.4:
+    successors: %bb.5, %bb.6
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.6, implicit killed %eflags
+    JMP_1 %bb.5
+
+  bb.5:
+    %ebx = IMPLICIT_DEF
+    RET 0
+
+  bb.6:
+    RET 0
+...
+# Check that we save only on branches we need in a tree-like CFG.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#2 uses : %RBX
+#CHECK-NEXT: BB#3 uses : %RBX
+#CHECK-NEXT: BB#5 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#1: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#2: Saves: | Restores: %RBX,
+#CHECK-NEXT: BB#3: Saves: | Restores: %RBX,
+#CHECK-NEXT: BB#5: Saves: %RBX, | Restores: %RBX,
Index: test/CodeGen/X86/ShrinkWrapping/lit.local.cfg
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
Index: test/CodeGen/X86/ShrinkWrapping/optimize-max-0.mir
===================================================================
--- /dev/null
+++ test/CodeGen/X86/ShrinkWrapping/optimize-max-0.mir
@@ -0,0 +1,47 @@
+# RUN: llc -mtriple=x86_64-- -run-pass=stack-protector -run-pass=prologepilog %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+# XFAIL: x86
+--- |
+  define void @f0() nounwind { ret void }
+...
+---
+name:            f0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.6, %bb.3
+
+    %eflags = IMPLICIT_DEF
+    JE_1 %bb.6, implicit killed %eflags
+    JMP_1 %bb.3
+
+  bb.3:
+    successors: %bb.3, %bb.4
+
+    %eflags = IMPLICIT_DEF
+    JNE_1 %bb.3, implicit killed %eflags
+    JMP_1 %bb.4
+
+  bb.4:
+    successors: %bb.6
+
+    JMP_1 %bb.6
+
+  bb.6:
+    successors: %bb.8
+
+    %ebx = IMPLICIT_DEF
+    JMP_1 %bb.8
+
+  bb.8:
+    RET 0
+
+...
+# FIXME: ShrinkWrap2: This fails because we detect a critical edge.
+
+#CHECK-LABEL: f0
+
+#CHECK: BB#3 uses : %RBX
+#CHECK: **** Shrink-wrapping results
+#CHECK-NEXT: BB#3: Saves: %RBX, | Restores:
+#CHECK-NEXT: BB#4: Saves: | Restores: %RBX,