Index: llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
+++ llvm/trunk/include/llvm/CodeGen/TargetInstrInfo.h
@@ -79,6 +79,13 @@
     return Opc <= TargetOpcode::GENERIC_OP_END;
   }
 
+  // Simple struct describing access to a FrameIndex.
+  struct FrameAccess {
+    const MachineMemOperand *MMO;
+    int FI;
+    FrameAccess(const MachineMemOperand *MMO, int FI) : MMO(MMO), FI(FI) {}
+  };
+
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
   const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
@@ -246,14 +253,13 @@
   }
 
   /// If the specified machine instruction has a load from a stack slot,
-  /// return true along with the FrameIndex of the loaded stack slot and the
-  /// machine mem operand containing the reference.
+  /// return true along with the FrameIndices of the loaded stack slot and the
+  /// machine mem operands containing the reference.
   /// If not, return false.  Unlike isLoadFromStackSlot, this returns true for
   /// any instructions that loads from the stack.  This is just a hint, as some
   /// cases may be missed.
   virtual bool hasLoadFromStackSlot(const MachineInstr &MI,
-                                    const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const;
+                                    SmallVectorImpl<FrameAccess> &Accesses) const;
 
   /// If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
@@ -284,14 +290,13 @@
   }
 
   /// If the specified machine instruction has a store to a stack slot,
-  /// return true along with the FrameIndex of the loaded stack slot and the
-  /// machine mem operand containing the reference.
+  /// return true along with the FrameIndices of the loaded stack slot and the
+  /// machine mem operands containing the reference.
   /// If not, return false.  Unlike isStoreToStackSlot,
   /// this returns true for any instructions that stores to the
   /// stack.  This is just a hint, as some cases may be missed.
   virtual bool hasStoreToStackSlot(const MachineInstr &MI,
-                                   const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const;
+                                    SmallVectorImpl<FrameAccess> &Accesses) const;
 
   /// Return true if the specified machine instruction
   /// is a copy of one stack slot to another and has no other effect.
Index: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -750,18 +750,28 @@
   const MachineFrameInfo &MFI = MF->getFrameInfo();
   bool Commented = false;
 
+  auto getSize = [&MFI](
+      const SmallVectorImpl<TargetInstrInfo::FrameAccess> &Accesses) {
+    unsigned Size = 0;
+    for (auto &A : Accesses)
+      if (MFI.isSpillSlotObjectIndex(A.FI))
+        Size += A.MMO->getSize();
+    return Size;
+  };
+
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
+  SmallVector<TargetInstrInfo::FrameAccess, 2> Accesses;
   if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
     if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload";
       Commented = true;
     }
-  } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      CommentOS << MMO->getSize() << "-byte Folded Reload";
+  } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
+    if (auto Size = getSize(Accesses)) {
+      CommentOS << Size << "-byte Folded Reload";
       Commented = true;
     }
   } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
@@ -770,9 +780,9 @@
       CommentOS << MMO->getSize() << "-byte Spill";
       Commented = true;
     }
-  } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      CommentOS << MMO->getSize() << "-byte Folded Spill";
+  } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
+    if (auto Size = getSize(Accesses)) {
+      CommentOS << Size << "-byte Folded Spill";
       Commented = true;
     }
   }
Index: llvm/trunk/lib/CodeGen/LiveDebugValues.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/LiveDebugValues.cpp
+++ llvm/trunk/lib/CodeGen/LiveDebugValues.cpp
@@ -470,7 +470,7 @@
                                          MachineFunction *MF, unsigned &Reg) {
   const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   int FI;
-  const MachineMemOperand *MMO;
+  SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
 
   // TODO: Handle multiple stores folded into one.
   if (!MI.hasOneMemOperand())
@@ -478,7 +478,7 @@
 
   // To identify a spill instruction, use the same criteria as in AsmPrinter.
   if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
-         TII->hasStoreToStackSlot(MI, MMO, FI)) &&
+         TII->hasStoreToStackSlot(MI, Accesses)) &&
         FrameInfo.isSpillSlotObjectIndex(FI)))
     return false;
 
Index: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
+++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
@@ -3120,18 +3120,24 @@
     // Handle blocks that were not included in subloops.
     if (Loops->getLoopFor(MBB) == L)
       for (MachineInstr &MI : *MBB) {
-        const MachineMemOperand *MMO;
+        SmallVector<TargetInstrInfo::FrameAccess, 2> Accesses;
 
         if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
           ++Reloads;
-        else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
-                 MFI.isSpillSlotObjectIndex(FI))
+        else if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+                 llvm::any_of(Accesses,
+                              [&MFI](const TargetInstrInfo::FrameAccess &A) {
+                                return MFI.isSpillSlotObjectIndex(A.FI);
+                              }))
           ++FoldedReloads;
         else if (TII->isStoreToStackSlot(MI, FI) &&
                  MFI.isSpillSlotObjectIndex(FI))
           ++Spills;
-        else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
-                 MFI.isSpillSlotObjectIndex(FI))
+        else if (TII->hasStoreToStackSlot(MI, Accesses) &&
+                 llvm::any_of(Accesses,
+                              [&MFI](const TargetInstrInfo::FrameAccess &A) {
+                                return MFI.isSpillSlotObjectIndex(A.FI);
+                              }))
           ++FoldedSpills;
       }
 
Index: llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp
+++ llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp
@@ -339,42 +339,37 @@
   return MadeChange;
 }
 
-bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(
+    const MachineInstr &MI, SmallVectorImpl<FrameAccess> &Accesses) const {
+
+  size_t StartSize = Accesses.size();
   for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
                                   oe = MI.memoperands_end();
        o != oe; ++o) {
     if ((*o)->isLoad()) {
       if (const FixedStackPseudoSourceValue *Value =
           dyn_cast_or_null<FixedStackPseudoSourceValue>(
-              (*o)->getPseudoValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
+              (*o)->getPseudoValue()))
+        Accesses.emplace_back(*o, Value->getFrameIndex());
     }
   }
-  return false;
+  return Accesses.size() != StartSize;
 }
 
-bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
-                                          const MachineMemOperand *&MMO,
-                                          int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(
+    const MachineInstr &MI, SmallVectorImpl<FrameAccess> &Accesses) const {
+  size_t StartSize = Accesses.size();
   for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
                                   oe = MI.memoperands_end();
        o != oe; ++o) {
     if ((*o)->isStore()) {
       if (const FixedStackPseudoSourceValue *Value =
           dyn_cast_or_null<FixedStackPseudoSourceValue>(
-              (*o)->getPseudoValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
+              (*o)->getPseudoValue()))
+        Accesses.emplace_back(*o, Value->getFrameIndex());
     }
   }
-  return false;
+  return Accesses.size() != StartSize;
 }
 
 bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1172,8 +1172,12 @@
 
 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
                                                     int &FrameIndex) const {
-  const MachineMemOperand *Dummy;
-  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
+  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+    FrameIndex = Accesses.begin()->FI;
+    return true;
+  }
+  return false;
 }
 
 void ARMBaseInstrInfo::
@@ -1386,8 +1390,12 @@
 
 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
                                                      int &FrameIndex) const {
-  const MachineMemOperand *Dummy;
-  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
+  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+    FrameIndex = Accesses.begin()->FI;
+    return true;
+  }
+  return false;
 }
 
 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
Index: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h
===================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -69,16 +69,16 @@
   /// Check if the instruction or the bundle of instructions has
   /// load from stack slots. Return the frameindex and machine memory operand
   /// if true.
-  bool hasLoadFromStackSlot(const MachineInstr &MI,
-                           const MachineMemOperand *&MMO,
-                           int &FrameIndex) const override;
+  bool hasLoadFromStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<TargetInstrInfo::FrameAccess> &Accesses) const override;
 
   /// Check if the instruction or the bundle of instructions has
   /// store to stack slots. Return the frameindex and machine memory operand
   /// if true.
-  bool hasStoreToStackSlot(const MachineInstr &MI,
-                           const MachineMemOperand *&MMO,
-                           int &FrameIndex) const override;
+  bool hasStoreToStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<TargetInstrInfo::FrameAccess> &Accesses) const override;
 
   /// Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
Index: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -335,37 +335,37 @@
 /// This function checks if the instruction or bundle of instructions
 /// has load from stack slot and returns frameindex and machine memory
 /// operand of that instruction if true.
-bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool HexagonInstrInfo::hasLoadFromStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<TargetInstrInfo::FrameAccess> &Accesses) const {
   if (MI.isBundle()) {
     const MachineBasicBlock *MBB = MI.getParent();
     MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
     for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
-      if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex))
+      if (TargetInstrInfo::hasLoadFromStackSlot(*MII, Accesses))
         return true;
     return false;
   }
 
-  return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex);
+  return TargetInstrInfo::hasLoadFromStackSlot(MI, Accesses);
 }
 
 /// This function checks if the instruction or bundle of instructions
 /// has store to stack slot and returns frameindex and machine memory
 /// operand of that instruction if true.
-bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool HexagonInstrInfo::hasStoreToStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<TargetInstrInfo::FrameAccess> &Accesses) const {
   if (MI.isBundle()) {
     const MachineBasicBlock *MBB = MI.getParent();
     MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
     for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
-      if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex))
+      if (TargetInstrInfo::hasStoreToStackSlot(*MII, Accesses))
         return true;
     return false;
   }
 
-  return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex);
+  return TargetInstrInfo::hasStoreToStackSlot(MI, Accesses);
 }
 
 /// This function can analyze one/two way branching only and should (mostly) be
Index: llvm/trunk/lib/Target/Lanai/LanaiInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ llvm/trunk/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -733,8 +733,11 @@
     if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
+    if (hasLoadFromStackSlot(MI, Accesses)){
+      FrameIndex = Accesses.begin()->FI;
+      return 1;
+    }
   }
   return 0;
 }
Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
@@ -411,8 +411,11 @@
     if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
+    if (hasLoadFromStackSlot(MI, Accesses)) {
+      FrameIndex = Accesses.begin()->FI;
+      return 1;
+    }
   }
   return 0;
 }
@@ -441,8 +444,11 @@
     if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<TargetInstrInfo::FrameAccess, 1> Accesses;
+    if (hasStoreToStackSlot(MI, Accesses)) {
+      FrameIndex = Accesses.begin()->FI;
+      return 1;
+    }
   }
   return 0;
 }
Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll
@@ -206,11 +206,11 @@
 ; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    and w20, w8, #0xffff00
 ; CHECK-NEXT:    mov w0, w20
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -225,12 +225,12 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x20, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    eor w0, w0, w1
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    and w20, w0, #0xffff00
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll
@@ -212,11 +212,11 @@
 ; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    and w20, w8, #0x55555555
 ; CHECK-NEXT:    mov w0, w20
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -231,12 +231,12 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x20, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    eor w0, w0, w1
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    and w20, w0, #0x55555555
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll
@@ -208,11 +208,11 @@
 ; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    and w20, w8, #0xf0f0f0f
 ; CHECK-NEXT:    mov w0, w20
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -227,12 +227,12 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x20, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    eor w0, w0, w1
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    and w20, w0, #0xf0f0f0f
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll
@@ -201,11 +201,11 @@
 ; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    and w20, w8, #0xffff
 ; CHECK-NEXT:    mov w0, w20
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -220,12 +220,12 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x20, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    eor w0, w0, w1
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    and w20, w0, #0xffff
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll
@@ -558,11 +558,11 @@
 ; CHECK-NEXT:    eor w8, w0, w1
 ; CHECK-NEXT:    and w20, w8, w3
 ; CHECK-NEXT:    mov w0, w20
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -576,12 +576,12 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x20, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    eor w0, w0, w1
-; CHECK-NEXT:    stp x19, x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x19, x30, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov w19, w1
 ; CHECK-NEXT:    and w20, w0, w3
 ; CHECK-NEXT:    bl use32
 ; CHECK-NEXT:    eor w0, w20, w19
-; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x19, x30, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x20, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
Index: llvm/trunk/test/CodeGen/AArch64/vec-libcalls.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/trunk/test/CodeGen/AArch64/vec-libcalls.ll
@@ -141,8 +141,8 @@
 ; CHECK-LABEL: sin_v5f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str d12, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT:    stp d11, d10, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
 ; CHECK-NEXT:    mov v8.16b, v4.16b
 ; CHECK-NEXT:    mov v9.16b, v3.16b
@@ -165,8 +165,8 @@
 ; CHECK-NEXT:    mov v2.16b, v10.16b
 ; CHECK-NEXT:    mov v3.16b, v9.16b
 ; CHECK-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #8] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov v4.16b, v0.16b
 ; CHECK-NEXT:    mov v0.16b, v12.16b
 ; CHECK-NEXT:    ldr d12, [sp], #48 // 8-byte Folded Reload
@@ -178,9 +178,9 @@
 define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
 ; CHECK-LABEL: sin_v6f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp d13, d12, [sp, #-64]! // 8-byte Folded Spill
-; CHECK-NEXT:    stp d11, d10, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    mov v8.16b, v5.16b
 ; CHECK-NEXT:    mov v9.16b, v4.16b
@@ -207,12 +207,12 @@
 ; CHECK-NEXT:    mov v3.16b, v10.16b
 ; CHECK-NEXT:    mov v4.16b, v9.16b
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov v5.16b, v0.16b
 ; CHECK-NEXT:    mov v0.16b, v13.16b
 ; CHECK-NEXT:    mov v1.16b, v12.16b
-; CHECK-NEXT:    ldp d13, d12, [sp], #64 // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp], #64 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
   %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
   ret <6 x float> %r
@@ -222,7 +222,7 @@
 ; CHECK-LABEL: sin_v3f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    stp d9, d8, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
 ; CHECK-NEXT:    mov v8.16b, v2.16b
 ; CHECK-NEXT:    mov v9.16b, v1.16b
@@ -235,7 +235,7 @@
 ; CHECK-NEXT:    bl sin
 ; CHECK-NEXT:    mov v1.16b, v9.16b
 ; CHECK-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp d9, d8, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov v2.16b, v0.16b
 ; CHECK-NEXT:    mov v0.16b, v10.16b
 ; CHECK-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload