diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2018,7 +2018,8 @@
   }
 
   virtual const TargetRegisterClass *
-  getVectorRegisterClassForSpill2Reg(const TargetRegisterInfo *TRI) const {
+  getVectorRegisterClassForSpill2Reg(const TargetRegisterInfo *TRI,
+                                     const TargetSubtargetInfo *STI) const {
     llvm_unreachable(
         "Target didn't implement "
         "TargetInstrInfo::createVirtualVectorRegisterForSpillToReg()");
@@ -2033,20 +2034,18 @@
   }
 
   /// Inserts \p SrcReg into the first lane of \p DstReg.
-  virtual MachineInstr *
-  spill2RegInsertToVectorReg(Register DstReg, Register SrcReg,
-                             int OperationBits, MachineBasicBlock *MBB,
-                             MachineBasicBlock::iterator InsertBeforeIt,
-                             const TargetRegisterInfo *TRI) const {
+  virtual MachineInstr *spill2RegInsertToVectorReg(
+      Register DstReg, Register SrcReg, int OperationBits,
+      MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertBeforeIt,
+      const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const {
     llvm_unreachable("Target does not implement this");
   }
 
   /// Extracts the first lane of \p SrcReg into \p DstReg.
-  virtual MachineInstr *
-  spill2RegExtractFromVectorReg(Register DstReg, Register SrcReg,
-                                int OperationBits, MachineBasicBlock *InsertMBB,
-                                MachineBasicBlock::iterator InsertBeforeIt,
-                                const TargetRegisterInfo *TRI) const {
+  virtual MachineInstr *spill2RegExtractFromVectorReg(
+      Register DstReg, Register SrcReg, int OperationBits,
+      MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt,
+      const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const {
     llvm_unreachable("Target does not implement this");
   }
 
diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp
--- a/llvm/lib/CodeGen/Spill2Reg.cpp
+++ b/llvm/lib/CodeGen/Spill2Reg.cpp
@@ -342,7 +342,7 @@
 
     MachineInstr *SpillToVector = TII->spill2RegInsertToVectorReg(
         VectorReg, OldReg, SpillData.MemBits, StackSpill->getParent(),
-        /*InsertBeforeIt=*/StackSpill->getIterator(), TRI);
+        /*InsertBeforeIt=*/StackSpill->getIterator(), TRI, &MF->getSubtarget());
 
     // Mark VectorReg as live in the instr's BB.
     LRUs[StackSpill->getParent()].addReg(VectorReg);
@@ -359,7 +359,8 @@
 
     MachineInstr *ReloadFromReg = TII->spill2RegExtractFromVectorReg(
         OldReg, VectorReg, ReloadData.MemBits, StackReload->getParent(),
-        /*InsertBeforeIt=*/StackReload->getIterator(), TRI);
+        /*InsertBeforeIt=*/StackReload->getIterator(), TRI,
+        &MF->getSubtarget());
 
     // Mark VectorReg as live in the instr's BB.
     LRUs[StackReload->getParent()].addReg(VectorReg);
@@ -471,7 +472,7 @@
 
     // Look for a physical register that in LRU.
     llvm::Optional<MCRegister> PhysVectorRegOpt = tryGetFreePhysicalReg(
-        TII->getVectorRegisterClassForSpill2Reg(TRI), LRU);
+        TII->getVectorRegisterClassForSpill2Reg(TRI, &MF->getSubtarget()), LRU);
     if (!PhysVectorRegOpt)
       continue;
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -667,7 +667,8 @@
   bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const override;
 
   const TargetRegisterClass *getVectorRegisterClassForSpill2Reg(
-      const TargetRegisterInfo *TRI) const override;
+      const TargetRegisterInfo *TRI,
+      const TargetSubtargetInfo *STI) const override;
 
   bool isSpill2RegProfitable(const MachineInstr *MI,
                              const TargetRegisterInfo *TRI,
@@ -681,13 +682,15 @@
   spill2RegInsertToVectorReg(Register DstReg, Register SrcReg,
                              int OperationBits, MachineBasicBlock *MBB,
                              MachineBasicBlock::iterator InsertBeforeIt,
-                             const TargetRegisterInfo *TRI) const override;
+                             const TargetRegisterInfo *TRI,
+                             const TargetSubtargetInfo *STI) const override;
 
   MachineInstr *
   spill2RegExtractFromVectorReg(Register DstReg, Register SrcReg,
                                 int OperationBits, MachineBasicBlock *InsertMBB,
                                 MachineBasicBlock::iterator InsertBeforeIt,
-                                const TargetRegisterInfo *TRI) const override;
+                                const TargetRegisterInfo *TRI,
+                                const TargetSubtargetInfo *STI) const override;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -89,6 +89,10 @@
     cl::desc("When checking for profitability, explore up to this many nearby "
              "instructions."));
 
+static cl::opt<bool> Spill2RegNoAVX(
+    "spill2reg-no-avx", cl::Hidden, cl::init(false),
+    cl::desc("Don't use AVX instructions even if the targets supports them."));
+
 // Pin the vtable to this file.
 void X86InstrInfo::anchor() {}
 
@@ -9530,10 +9534,16 @@
   return X86STI->hasSSE41();
 }
 
+static inline bool useAVX(const TargetSubtargetInfo *STI) {
+  const X86Subtarget *X86STI = static_cast<const X86Subtarget *>(STI);
+  bool UseAVX = X86STI->hasAVX() && !Spill2RegNoAVX;
+  return UseAVX;
+}
+
 const TargetRegisterClass *X86InstrInfo::getVectorRegisterClassForSpill2Reg(
-    const TargetRegisterInfo *TRI) const {
-  const TargetRegisterClass *VecRegClass =
-      TRI->getRegClass(X86::VR128RegClassID);
+    const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const {
+  const TargetRegisterClass *VecRegClass = TRI->getRegClass(
+      useAVX(STI) ? X86::VR128XRegClassID : X86::VR128RegClassID);
   return VecRegClass;
 }
 
@@ -9579,14 +9589,22 @@
   return MemHeuristic && VecHeuristic;
 }
 
-static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert) {
+static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert,
+                                         const TargetSubtargetInfo *STI) {
+  bool UseAVX = useAVX(STI);
   switch (Bits) {
   case 8:
   case 16:
   case 32:
-    return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr;
+    if (UseAVX)
+      return Insert ? X86::VMOVDI2PDIZrr : X86::VMOVPDI2DIZrr;
+    else
+      return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr;
   case 64:
-    return Insert ? X86::MOV64toPQIrr : X86::MOVPQIto64rr;
+    if (UseAVX)
+      return Insert ? X86::VMOV64toPQIZrr : X86::VMOVPQIto64Zrr;
+    else
+      return Insert ? X86::MOV64toPQIrr : X86::MOVPQIto64rr;
   default:
     llvm_unreachable("Unsupported bits");
   }
@@ -9624,11 +9642,11 @@
 
 MachineInstr *X86InstrInfo::spill2RegInsertToVectorReg(
     Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB,
-    MachineBasicBlock::iterator InsertBeforeIt,
-    const TargetRegisterInfo *TRI) const {
+    MachineBasicBlock::iterator InsertBeforeIt, const TargetRegisterInfo *TRI,
+    const TargetSubtargetInfo *STI) const {
   DebugLoc DL;
   unsigned InsertOpcode =
-      getInsertOrExtractOpcode(OperationBits, true /*insert*/);
+      getInsertOrExtractOpcode(OperationBits, true /*insert*/, STI);
   const MCInstrDesc &InsertMCID = get(InsertOpcode);
   // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit
   // register. For example:
@@ -9645,10 +9663,10 @@
 MachineInstr *X86InstrInfo::spill2RegExtractFromVectorReg(
     Register DstReg, Register SrcReg, int OperationBits,
     MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt,
-    const TargetRegisterInfo *TRI) const {
+    const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const {
   DebugLoc DL;
   unsigned ExtractOpcode =
-      getInsertOrExtractOpcode(OperationBits, false /*extract*/);
+      getInsertOrExtractOpcode(OperationBits, false /*extract*/, STI);
   const MCInstrDesc &ExtractMCID = get(ExtractOpcode);
   // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit
   // register. For example:
diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll
--- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll
+++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx | FileCheck --check-prefix=AVX %s
 
 ; End-to-end check that Spill2Reg works with 16-bit registers.
 
@@ -130,6 +131,90 @@
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+;
+; AVX-LABEL: _Z5spillv:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    pushq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    pushq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    pushq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    pushq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 56
+; AVX-NEXT:    .cfi_offset %rbx, -56
+; AVX-NEXT:    .cfi_offset %r12, -48
+; AVX-NEXT:    .cfi_offset %r13, -40
+; AVX-NEXT:    .cfi_offset %r14, -32
+; AVX-NEXT:    .cfi_offset %r15, -24
+; AVX-NEXT:    .cfi_offset %rbp, -16
+; AVX-NEXT:    movw D0(%rip), %ax
+; AVX-NEXT:    vmovd %eax, %xmm3
+; AVX-NEXT:    movzwl D1(%rip), %ecx
+; AVX-NEXT:    movzwl D2(%rip), %edx
+; AVX-NEXT:    movzwl D3(%rip), %esi
+; AVX-NEXT:    movzwl D4(%rip), %edi
+; AVX-NEXT:    movzwl D5(%rip), %r8d
+; AVX-NEXT:    movzwl D6(%rip), %r9d
+; AVX-NEXT:    movzwl D7(%rip), %r10d
+; AVX-NEXT:    movzwl D8(%rip), %r11d
+; AVX-NEXT:    movzwl D9(%rip), %ebx
+; AVX-NEXT:    movzwl D10(%rip), %ebp
+; AVX-NEXT:    movzwl D11(%rip), %r14d
+; AVX-NEXT:    movzwl D12(%rip), %r15d
+; AVX-NEXT:    movzwl D13(%rip), %r12d
+; AVX-NEXT:    movzwl D14(%rip), %r13d
+; AVX-NEXT:    movw D15(%rip), %ax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    movw D16(%rip), %ax
+; AVX-NEXT:    vmovd %eax, %xmm1
+; AVX-NEXT:    movw D17(%rip), %ax
+; AVX-NEXT:    vmovd %eax, %xmm4
+; AVX-NEXT:    movw D18(%rip), %ax
+; AVX-NEXT:    vmovd %eax, %xmm2
+; AVX-NEXT:    #APP
+; AVX-NEXT:    #NO_APP
+; AVX-NEXT:    vmovd %xmm3, %eax
+; AVX-NEXT:    movw %ax, U0(%rip)
+; AVX-NEXT:    movw %cx, U1(%rip)
+; AVX-NEXT:    movw %dx, U2(%rip)
+; AVX-NEXT:    movw %si, U3(%rip)
+; AVX-NEXT:    movw %di, U4(%rip)
+; AVX-NEXT:    movw %r8w, U5(%rip)
+; AVX-NEXT:    movw %r9w, U6(%rip)
+; AVX-NEXT:    movw %r10w, U7(%rip)
+; AVX-NEXT:    movw %r11w, U8(%rip)
+; AVX-NEXT:    movw %bx, U9(%rip)
+; AVX-NEXT:    movw %bp, U10(%rip)
+; AVX-NEXT:    movw %r14w, U11(%rip)
+; AVX-NEXT:    movw %r15w, U12(%rip)
+; AVX-NEXT:    movw %r12w, U13(%rip)
+; AVX-NEXT:    movw %r13w, U14(%rip)
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    movw %ax, U15(%rip)
+; AVX-NEXT:    vmovd %xmm1, %eax
+; AVX-NEXT:    movw %ax, U16(%rip)
+; AVX-NEXT:    vmovd %xmm4, %eax
+; AVX-NEXT:    movw %ax, U17(%rip)
+; AVX-NEXT:    vmovd %xmm2, %eax
+; AVX-NEXT:    movw %ax, U18(%rip)
+; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    popq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    popq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    popq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    popq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 entry:
   %0 = load i16, i16* @D0
   %1 = load i16, i16* @D1
diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll
--- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll
+++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck --check-prefix=AVX %s
 
 ; End-to-end check that Spill2Reg works with 32-bit registers.
 
@@ -130,6 +131,90 @@
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+;
+; AVX-LABEL: _Z5spillv:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    pushq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    pushq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    pushq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    pushq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 56
+; AVX-NEXT:    .cfi_offset %rbx, -56
+; AVX-NEXT:    .cfi_offset %r12, -48
+; AVX-NEXT:    .cfi_offset %r13, -40
+; AVX-NEXT:    .cfi_offset %r14, -32
+; AVX-NEXT:    .cfi_offset %r15, -24
+; AVX-NEXT:    .cfi_offset %rbp, -16
+; AVX-NEXT:    movl D0(%rip), %eax
+; AVX-NEXT:    movd %eax, %xmm3
+; AVX-NEXT:    movl D1(%rip), %ecx
+; AVX-NEXT:    movl D2(%rip), %edx
+; AVX-NEXT:    movl D3(%rip), %esi
+; AVX-NEXT:    movl D4(%rip), %edi
+; AVX-NEXT:    movl D5(%rip), %r8d
+; AVX-NEXT:    movl D6(%rip), %r9d
+; AVX-NEXT:    movl D7(%rip), %r10d
+; AVX-NEXT:    movl D8(%rip), %r11d
+; AVX-NEXT:    movl D9(%rip), %ebx
+; AVX-NEXT:    movl D10(%rip), %ebp
+; AVX-NEXT:    movl D11(%rip), %r14d
+; AVX-NEXT:    movl D12(%rip), %r15d
+; AVX-NEXT:    movl D13(%rip), %r12d
+; AVX-NEXT:    movl D14(%rip), %r13d
+; AVX-NEXT:    movl D15(%rip), %eax
+; AVX-NEXT:    movd %eax, %xmm0
+; AVX-NEXT:    movl D16(%rip), %eax
+; AVX-NEXT:    movd %eax, %xmm1
+; AVX-NEXT:    movl D17(%rip), %eax
+; AVX-NEXT:    movd %eax, %xmm4
+; AVX-NEXT:    movl D18(%rip), %eax
+; AVX-NEXT:    movd %eax, %xmm2
+; AVX-NEXT:    #APP
+; AVX-NEXT:    #NO_APP
+; AVX-NEXT:    movd %xmm3, %eax
+; AVX-NEXT:    movl %eax, U0(%rip)
+; AVX-NEXT:    movl %ecx, U1(%rip)
+; AVX-NEXT:    movl %edx, U2(%rip)
+; AVX-NEXT:    movl %esi, U3(%rip)
+; AVX-NEXT:    movl %edi, U4(%rip)
+; AVX-NEXT:    movl %r8d, U5(%rip)
+; AVX-NEXT:    movl %r9d, U6(%rip)
+; AVX-NEXT:    movl %r10d, U7(%rip)
+; AVX-NEXT:    movl %r11d, U8(%rip)
+; AVX-NEXT:    movl %ebx, U9(%rip)
+; AVX-NEXT:    movl %ebp, U10(%rip)
+; AVX-NEXT:    movl %r14d, U11(%rip)
+; AVX-NEXT:    movl %r15d, U12(%rip)
+; AVX-NEXT:    movl %r12d, U13(%rip)
+; AVX-NEXT:    movl %r13d, U14(%rip)
+; AVX-NEXT:    movd %xmm0, %eax
+; AVX-NEXT:    movl %eax, U15(%rip)
+; AVX-NEXT:    movd %xmm1, %eax
+; AVX-NEXT:    movl %eax, U16(%rip)
+; AVX-NEXT:    movd %xmm4, %eax
+; AVX-NEXT:    movl %eax, U17(%rip)
+; AVX-NEXT:    movd %xmm2, %eax
+; AVX-NEXT:    movl %eax, U18(%rip)
+; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    popq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    popq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    popq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    popq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 entry:
   %0 = load i32, i32* @D0
   %1 = load i32, i32* @D1
diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll
--- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll
+++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck --check-prefix=AVX %s
 
 ; End-to-end check that Spill2Reg works with 64-bit registers.
 
@@ -130,6 +131,90 @@
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+;
+; AVX-LABEL: _Z5spillv:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    pushq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    pushq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    pushq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    pushq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 56
+; AVX-NEXT:    .cfi_offset %rbx, -56
+; AVX-NEXT:    .cfi_offset %r12, -48
+; AVX-NEXT:    .cfi_offset %r13, -40
+; AVX-NEXT:    .cfi_offset %r14, -32
+; AVX-NEXT:    .cfi_offset %r15, -24
+; AVX-NEXT:    .cfi_offset %rbp, -16
+; AVX-NEXT:    movq D0(%rip), %rax
+; AVX-NEXT:    movq %rax, %xmm3
+; AVX-NEXT:    movq D1(%rip), %rcx
+; AVX-NEXT:    movq D2(%rip), %rdx
+; AVX-NEXT:    movq D3(%rip), %rsi
+; AVX-NEXT:    movq D4(%rip), %rdi
+; AVX-NEXT:    movq D5(%rip), %r8
+; AVX-NEXT:    movq D6(%rip), %r9
+; AVX-NEXT:    movq D7(%rip), %r10
+; AVX-NEXT:    movq D8(%rip), %r11
+; AVX-NEXT:    movq D9(%rip), %rbx
+; AVX-NEXT:    movq D10(%rip), %r14
+; AVX-NEXT:    movq D11(%rip), %r15
+; AVX-NEXT:    movq D12(%rip), %r12
+; AVX-NEXT:    movq D13(%rip), %r13
+; AVX-NEXT:    movq D14(%rip), %rbp
+; AVX-NEXT:    movq D15(%rip), %rax
+; AVX-NEXT:    movq %rax, %xmm0
+; AVX-NEXT:    movq D16(%rip), %rax
+; AVX-NEXT:    movq %rax, %xmm1
+; AVX-NEXT:    movq D17(%rip), %rax
+; AVX-NEXT:    movq %rax, %xmm4
+; AVX-NEXT:    movq D18(%rip), %rax
+; AVX-NEXT:    movq %rax, %xmm2
+; AVX-NEXT:    #APP
+; AVX-NEXT:    #NO_APP
+; AVX-NEXT:    movq %xmm3, %rax
+; AVX-NEXT:    movq %rax, U0(%rip)
+; AVX-NEXT:    movq %rcx, U1(%rip)
+; AVX-NEXT:    movq %rdx, U2(%rip)
+; AVX-NEXT:    movq %rsi, U3(%rip)
+; AVX-NEXT:    movq %rdi, U4(%rip)
+; AVX-NEXT:    movq %r8, U5(%rip)
+; AVX-NEXT:    movq %r9, U6(%rip)
+; AVX-NEXT:    movq %r10, U7(%rip)
+; AVX-NEXT:    movq %r11, U8(%rip)
+; AVX-NEXT:    movq %rbx, U9(%rip)
+; AVX-NEXT:    movq %r14, U10(%rip)
+; AVX-NEXT:    movq %r15, U11(%rip)
+; AVX-NEXT:    movq %r12, U12(%rip)
+; AVX-NEXT:    movq %r13, U13(%rip)
+; AVX-NEXT:    movq %rbp, U14(%rip)
+; AVX-NEXT:    movq %xmm0, %rax
+; AVX-NEXT:    movq %rax, U15(%rip)
+; AVX-NEXT:    movq %xmm1, %rax
+; AVX-NEXT:    movq %rax, U16(%rip)
+; AVX-NEXT:    movq %xmm4, %rax
+; AVX-NEXT:    movq %rax, U17(%rip)
+; AVX-NEXT:    movq %xmm2, %rax
+; AVX-NEXT:    movq %rax, U18(%rip)
+; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    popq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    popq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    popq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    popq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 entry:
   %0 = load i64, i64* @D0
   %1 = load i64, i64* @D1
diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll
--- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll
+++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s
+; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx | FileCheck --check-prefix=AVX %s
 
 ; End-to-end check that Spill2Reg works with 8-bit registers.
 
@@ -130,6 +131,90 @@
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+;
+; AVX-LABEL: _Z5spillv:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    pushq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    pushq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    pushq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    pushq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    pushq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 56
+; AVX-NEXT:    .cfi_offset %rbx, -56
+; AVX-NEXT:    .cfi_offset %r12, -48
+; AVX-NEXT:    .cfi_offset %r13, -40
+; AVX-NEXT:    .cfi_offset %r14, -32
+; AVX-NEXT:    .cfi_offset %r15, -24
+; AVX-NEXT:    .cfi_offset %rbp, -16
+; AVX-NEXT:    movb D0(%rip), %al
+; AVX-NEXT:    vmovd %eax, %xmm3
+; AVX-NEXT:    movb D1(%rip), %cl
+; AVX-NEXT:    movb D2(%rip), %dl
+; AVX-NEXT:    movb D3(%rip), %sil
+; AVX-NEXT:    movb D4(%rip), %dil
+; AVX-NEXT:    movb D5(%rip), %r8b
+; AVX-NEXT:    movb D6(%rip), %r9b
+; AVX-NEXT:    movb D7(%rip), %r10b
+; AVX-NEXT:    movb D8(%rip), %r11b
+; AVX-NEXT:    movb D9(%rip), %bl
+; AVX-NEXT:    movb D10(%rip), %bpl
+; AVX-NEXT:    movb D11(%rip), %r14b
+; AVX-NEXT:    movb D12(%rip), %r15b
+; AVX-NEXT:    movb D13(%rip), %r12b
+; AVX-NEXT:    movb D14(%rip), %r13b
+; AVX-NEXT:    movb D15(%rip), %al
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    movb D16(%rip), %al
+; AVX-NEXT:    vmovd %eax, %xmm1
+; AVX-NEXT:    movb D17(%rip), %al
+; AVX-NEXT:    vmovd %eax, %xmm4
+; AVX-NEXT:    movb D18(%rip), %al
+; AVX-NEXT:    vmovd %eax, %xmm2
+; AVX-NEXT:    #APP
+; AVX-NEXT:    #NO_APP
+; AVX-NEXT:    vmovd %xmm3, %eax
+; AVX-NEXT:    movb %al, U0(%rip)
+; AVX-NEXT:    movb %cl, U1(%rip)
+; AVX-NEXT:    movb %dl, U2(%rip)
+; AVX-NEXT:    movb %sil, U3(%rip)
+; AVX-NEXT:    movb %dil, U4(%rip)
+; AVX-NEXT:    movb %r8b, U5(%rip)
+; AVX-NEXT:    movb %r9b, U6(%rip)
+; AVX-NEXT:    movb %r10b, U7(%rip)
+; AVX-NEXT:    movb %r11b, U8(%rip)
+; AVX-NEXT:    movb %bl, U9(%rip)
+; AVX-NEXT:    movb %bpl, U10(%rip)
+; AVX-NEXT:    movb %r14b, U11(%rip)
+; AVX-NEXT:    movb %r15b, U12(%rip)
+; AVX-NEXT:    movb %r12b, U13(%rip)
+; AVX-NEXT:    movb %r13b, U14(%rip)
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    movb %al, U15(%rip)
+; AVX-NEXT:    vmovd %xmm1, %eax
+; AVX-NEXT:    movb %al, U16(%rip)
+; AVX-NEXT:    vmovd %xmm4, %eax
+; AVX-NEXT:    movb %al, U17(%rip)
+; AVX-NEXT:    vmovd %xmm2, %eax
+; AVX-NEXT:    movb %al, U18(%rip)
+; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    popq %r12
+; AVX-NEXT:    .cfi_def_cfa_offset 40
+; AVX-NEXT:    popq %r13
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    popq %r14
+; AVX-NEXT:    .cfi_def_cfa_offset 24
+; AVX-NEXT:    popq %r15
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    popq %rbp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 entry:
   %0 = load i8, i8* @D0
   %1 = load i8, i8* @D1
diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir
--- a/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir
+++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s
 
 # Simple test with a single 16-bit spill-reload pair:
 #   spill stack.0
@@ -30,6 +31,12 @@
     ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0
     ; CHECK-NEXT: MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0)
     ; CHECK-NEXT: RET 0
+    ; AVX-LABEL: name: func
+    ; AVX: $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0)
+    ; AVX-NEXT: $xmm0 = VMOVDI2PDIZrr $eax
+    ; AVX-NEXT: $eax = VMOVPDI2DIZrr $xmm0
+    ; AVX-NEXT: MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0)
+    ; AVX-NEXT: RET 0
     $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0)
     MOV16mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $ax :: (store (s16) into %stack.0)
     ; reload
diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir
--- a/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir
+++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir
@@ -1,6 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s
 
 # Simple test with a single spill-reload pair (32-bit version):
 #   spill stack.0
@@ -24,6 +25,13 @@
 body:             |
 
 
+ ; AVX-LABEL:  bb.0:
+ ; AVX-NEXT:    $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0)
+ ; AVX-NEXT:    $xmm0 = VMOVDI2PDIZrr $eax
+ ; AVX-NEXT:    $eax = VMOVPDI2DIZrr $xmm0
+ ; AVX-NEXT:    MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0)
+ ; AVX-NEXT:    RET 0
+
   bb.0:
     ; spill
     ; CHECK-LABEL: name: func
diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir
--- a/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir
+++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir
@@ -1,6 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -spill2reg-no-avx -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NO_AVX_FLAG %s
 
 # Simple test with a single spill-reload pair (64-bit version):
 #   spill stack.0
@@ -38,6 +40,18 @@
     ; NOSSE-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
     ; NOSSE-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
     ; NOSSE-NEXT: RET 0
+    ; AVX-LABEL: name: func
+    ; AVX: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
+    ; AVX-NEXT: $xmm0 = VMOV64toPQIZrr $rax
+    ; AVX-NEXT: $rax = VMOVPQIto64Zrr $xmm0
+    ; AVX-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
+    ; AVX-NEXT: RET 0
+    ; NO_AVX_FLAG-LABEL: name: func
+    ; NO_AVX_FLAG: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
+    ; NO_AVX_FLAG-NEXT: $xmm0 = MOV64toPQIrr $rax
+    ; NO_AVX_FLAG-NEXT: $rax = MOVPQIto64rr $xmm0
+    ; NO_AVX_FLAG-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0)
+    ; NO_AVX_FLAG-NEXT: RET 0
     $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0)
     MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
     ; reload
diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir
--- a/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir
+++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s
 
 # Simple test with a single 8-bit spill-reload pair:
 #   spill stack.0
@@ -30,6 +31,12 @@
     ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0
     ; CHECK-NEXT: MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0)
     ; CHECK-NEXT: RET 0
+    ; AVX-LABEL: name: func
+    ; AVX: $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0)
+    ; AVX-NEXT: $xmm0 = VMOVDI2PDIZrr $eax
+    ; AVX-NEXT: $eax = VMOVPDI2DIZrr $xmm0
+    ; AVX-NEXT: MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0)
+    ; AVX-NEXT: RET 0
     $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0)
     MOV8mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $al :: (store (s8) into %stack.0)
     ; reload