diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -275,6 +275,11 @@
     return isLoadFromStackSlot(MI, FrameIndex);
   }
 
+  virtual const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI,
+                                                      int &FrameIndex) const {
+    llvm_unreachable("target did not implement");
+  }
+
   /// Check for post-frame ptr elimination stack locations as well.
   /// This uses a heuristic so it isn't reliable for correctness.
   virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
@@ -302,6 +307,11 @@
     return 0;
   }
 
+  virtual const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI,
+                                                     int &FrameIndex) const {
+    llvm_unreachable("target did not implement");
+  }
+
   /// Optional extension of isStoreToStackSlot that returns the number of
   /// bytes stored to the stack. This must be implemented if a backend
   /// supports partial stack slot spills/loads to further disambiguate
@@ -1996,6 +2006,17 @@
     return MI.getOperand(0);
   }
 
+  /// \Returns true if a spill/reload of size \p MemBits can be handled by
+  /// Spill2Reg.
+  virtual bool isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::isLegalToSpill2Reg!");
+  }
+
+  virtual bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const {
+    return false;
+  }
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp
--- a/llvm/lib/CodeGen/Spill2Reg.cpp
+++ b/llvm/lib/CodeGen/Spill2Reg.cpp
@@ -15,10 +15,15 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
@@ -33,6 +38,61 @@
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   void releaseMemory() override;
   bool runOnMachineFunction(MachineFunction &) override;
+
+private:
+  /// Holds data for spills and reloads.
+  struct StackSlotDataEntry {
+    /// This is set to true to disable code generation for the spills/reloads
+    /// that we collected in this entry.
+    bool Disable = false;
+    /// Indentation for the dump() methods.
+    static constexpr const int DumpInd = 2;
+
+    /// The data held for each spill/reload.
+    struct MIData {
+      MIData(MachineInstr *MI, const MachineOperand *MO, unsigned SpillBits)
+          : MI(MI), MO(MO), SpillBits(SpillBits) {}
+      /// The Spill/Reload instruction.
+      MachineInstr *MI = nullptr;
+      /// The operand being spilled/reloaded.
+      const MachineOperand *MO = nullptr;
+      /// The size of the data spilled/reloaded in bits. This occasionally
+      /// differs across accesses to the same stack slot.
+      unsigned SpillBits = 0;
+#ifndef NDEBUG
+      LLVM_DUMP_METHOD void dump() const;
+#endif
+    };
+    SmallVector<MIData, 1> Spills;
+    SmallVector<MIData, 1> Reloads;
+
+    /// \Returns the register class of the register spilled/reloaded.
+    const TargetRegisterClass *getRegClass(MachineRegisterInfo *MRI) const {
+      Register SpillReg = Spills.front().MO->getReg();
+      return MRI->getRegClass(SpillReg);
+    }
+#ifndef NDEBUG
+    LLVM_DUMP_METHOD void dump() const;
+#endif
+  };
+  /// Look for candidates for spill2reg. These candidates are in places with
+  /// high memoru unit contention. Fills in StackSlotData.
+  void collectSpillsAndReloads();
+  /// Replace spills to stack with spills to registers (same for reloads).
+  void generateCode();
+  /// Cleanup data structures once the pass is finished.
+  void cleanup();
+  /// The main entry point for this pass.
+  bool run();
+
+  /// Map from a stack slot to the corresponding spills and reloads.
+  DenseMap<int, StackSlotDataEntry> StackSlotData;
+
+  MachineFunction *MF = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  MachineFrameInfo *MFI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
 };
 
 } // namespace
@@ -45,12 +105,112 @@
 void Spill2Reg::releaseMemory() {}
 
 bool Spill2Reg::runOnMachineFunction(MachineFunction &MFn) {
-  llvm_unreachable("Unimplemented");
+  // Disable if NoImplicitFloat to avoid emitting instrs that use vectors.
+  if (MFn.getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
+    return false;
+
+  MF = &MFn;
+  MRI = &MF->getRegInfo();
+  MFI = &MF->getFrameInfo();
+  TII = MF->getSubtarget().getInstrInfo();
+  TRI = MF->getSubtarget().getRegisterInfo();
+  // Enable only if the target supports the appropriate vector instruction set.
+  if (!TII->targetSupportsSpill2Reg(&MF->getSubtarget()))
+    return false;
+
+  return run();
 }
 
 char Spill2Reg::ID = 0;
 
 char &llvm::Spill2RegID = Spill2Reg::ID;
 
+void Spill2Reg::collectSpillsAndReloads() {
+  /// The checks for collecting spills and reloads are identical, so we keep
+  /// them here in one place. Return true if we should not collect this.
+  auto SkipEntry = [this](int StackSlot, unsigned SpillBits,
+                          unsigned Opcode) -> bool {
+    // If not a spill/reload stack slot.
+    if (!MFI->isSpillSlotObjectIndex(StackSlot))
+      return true;
+    // Check size in bits.
+    if (!TII->isLegalToSpill2Reg(SpillBits, Opcode))
+      return true;
+    return false;
+  };
+
+  // Collect spills and reloads and associate them to stack slots.
+  // If any spill/reload for a stack slot is found not to be eligible for
+  // spill-to-reg, then that stack slot is disabled.
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+      int StackSlot;
+      if (const MachineOperand *MO = TII->isStoreToStackSlotMO(MI, StackSlot)) {
+        MachineInstr *Spill = &MI;
+        auto &Entry = StackSlotData[StackSlot];
+        unsigned SpillBits = TRI->getRegSizeInBits(MO->getReg(), *MRI);
+        if (SkipEntry(StackSlot, SpillBits, MI.getOpcode())) {
+          Entry.Disable = true;
+          continue;
+        }
+        Entry.Spills.emplace_back(Spill, MO, SpillBits);
+      } else if (const MachineOperand *MO =
+                     TII->isLoadFromStackSlotMO(MI, StackSlot)) {
+        MachineInstr *Reload = &MI;
+        auto &Entry = StackSlotData[StackSlot];
+        unsigned SpillBits = TRI->getRegSizeInBits(MO->getReg(), *MRI);
+        if (SkipEntry(StackSlot, SpillBits, MI.getOpcode())) {
+          Entry.Disable = true;
+          continue;
+        }
+        assert(Reload->getRestoreSize(TII) && "Expected reload");
+        Entry.Reloads.emplace_back(Reload, MO, SpillBits);
+      } else {
+        // This should capture uses of the stack in instructions that access
+        // memory (e.g., folded spills/reloads) and non-memory instructions,
+        // like x86 LEA.
+        for (const MachineOperand &MO : MI.operands())
+          if (MO.isFI()) {
+            int StackSlot = MO.getIndex();
+            auto &Entry = StackSlotData[StackSlot];
+            Entry.Disable = true;
+          }
+      }
+    }
+  }
+}
+
+void Spill2Reg::generateCode() { llvm_unreachable("Unimplemented"); }
+
+void Spill2Reg::cleanup() { StackSlotData.clear(); }
+
+bool Spill2Reg::run() {
+  // Walk over each instruction in the code keeping track of the processor's
+  // port pressure and look for memory unit hot-spots.
+  collectSpillsAndReloads();
+
+  // Replace each spills/reloads to stack slots with register spills/reloads.
+  generateCode();
+
+  cleanup();
+  return true;
+}
+
+#ifndef NDEBUG
+void Spill2Reg::StackSlotDataEntry::MIData::dump() const {
+  dbgs() << "  (" << *MO << ") " << *MI;
+}
+
+void Spill2Reg::StackSlotDataEntry::dump() const {
+  dbgs().indent(DumpInd) << "Disable: " << Disable << "\n";
+  dbgs().indent(DumpInd) << "Spills:\n";
+  for (const MIData &Data : Spills)
+    Data.dump();
+  dbgs().indent(DumpInd) << "Reloads:\n";
+  for (const MIData &Data : Reloads)
+    Data.dump();
+}
+#endif
+
 INITIALIZE_PASS_BEGIN(Spill2Reg, "spill2reg", "Spill2Reg", false, false)
 INITIALIZE_PASS_END(Spill2Reg, "spill2reg", "Spill2Reg", false, false)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -212,6 +212,14 @@
 
   unsigned isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
+
+  const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI,
+                                              int &FrameIndex,
+                                              unsigned &MemBytes) const;
+
+  const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI,
+                                              int &FrameIndex) const override;
+
   unsigned isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex,
                                unsigned &MemBytes) const override;
@@ -223,6 +231,14 @@
 
   unsigned isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;
+
+  const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI,
+                                             int &FrameIndex,
+                                             unsigned &MemBytes) const;
+
+  const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI,
+                                             int &FrameIndex) const override;
+
   unsigned isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex,
                               unsigned &MemBytes) const override;
@@ -645,6 +661,10 @@
                             Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
                             const MachineInstr &OI, bool *IsSwapped,
                             int64_t *ImmDelta) const;
+
+  bool isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const override;
+
+  bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const override;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -887,13 +887,27 @@
   return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
 }
 
+const MachineOperand *
+X86InstrInfo::isLoadFromStackSlotMO(const MachineInstr &MI, int &FrameIndex,
+                                    unsigned &MemBytes) const {
+  if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
+    if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+      return &MI.getOperand(0);
+  return nullptr;
+}
+
+const MachineOperand *
+X86InstrInfo::isLoadFromStackSlotMO(const MachineInstr &MI,
+                                    int &FrameIndex) const {
+  unsigned UnusedMemBytes;
+  return isLoadFromStackSlotMO(MI, FrameIndex, UnusedMemBytes);
+}
+
 unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
                                            int &FrameIndex,
                                            unsigned &MemBytes) const {
-  if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
-    if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
-      return MI.getOperand(0).getReg();
-  return 0;
+  const MachineOperand *MO = isLoadFromStackSlotMO(MI, FrameIndex, MemBytes);
+  return MO != nullptr ? (unsigned)MO->getReg() : 0;
 }
 
 unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
@@ -921,14 +935,28 @@
   return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
 }
 
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
-                                          int &FrameIndex,
-                                          unsigned &MemBytes) const {
+const MachineOperand *
+X86InstrInfo::isStoreToStackSlotMO(const MachineInstr &MI, int &FrameIndex,
+                                   unsigned &MemBytes) const {
   if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
     if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
         isFrameOperand(MI, 0, FrameIndex))
-      return MI.getOperand(X86::AddrNumOperands).getReg();
-  return 0;
+      return &MI.getOperand(X86::AddrNumOperands);
+  return nullptr;
+}
+
+const MachineOperand *
+X86InstrInfo::isStoreToStackSlotMO(const MachineInstr &MI,
+                                   int &FrameIndex) const {
+  unsigned UnusedMemBytes;
+  return isStoreToStackSlotMO(MI, FrameIndex, UnusedMemBytes);
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                          int &FrameIndex,
+                                          unsigned &MemBytes) const {
+  const MachineOperand *MO = isStoreToStackSlotMO(MI, FrameIndex, MemBytes);
+  return MO != nullptr ? (unsigned)MO->getReg() : 0;
 }
 
 unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
@@ -9457,5 +9485,33 @@
   return It;
 }
 
+bool X86InstrInfo::isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const {
+  switch (Opcode) {
+  // Skip instructions like `$k1 = KMOVWkm %stack.1` because replacing stack
+  // with xmm0 results in an illegal instruction `movq  %k1, %xmm0`.
+  case X86::KMOVBkm:
+  case X86::KMOVBmk:
+  case X86::KMOVWkm:
+  case X86::KMOVWmk:
+  case X86::KMOVDkm:
+  case X86::KMOVDmk:
+  case X86::KMOVQkm:
+  case X86::KMOVQmk:
+    return false;
+  }
+  switch (MemBits) {
+  case 64:
+  case 32:
+    return true;
+  }
+  return false;
+}
+
+bool X86InstrInfo::targetSupportsSpill2Reg(
+    const TargetSubtargetInfo *STI) const {
+  const X86Subtarget *X86STI = static_cast<const X86Subtarget *>(STI);
+  return X86STI->hasSSE41();
+}
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"