Index: lib/Target/Mips/MipsTargetMachine.cpp
===================================================================
--- lib/Target/Mips/MipsTargetMachine.cpp
+++ lib/Target/Mips/MipsTargetMachine.cpp
@@ -68,7 +68,8 @@
 
   // 8 and 16 bit integers only need to have natural alignment, but try to
   // align them to 32 bits. 64 bit integers have natural alignment.
-  Ret += "-i8:8:32-i16:16:32-i64:64";
+  // 128 bit integers are always aligned to 128 bits.
+  Ret += "-i8:8:32-i16:16:32-i64:64-i128:128";
 
   // 32 bit registers are always available and the stack is at least 64 bit
   // aligned. On N64 64 bit registers are also available and the stack is
Index: lib/Target/PowerPC/PPCTargetMachine.cpp
===================================================================
--- lib/Target/PowerPC/PPCTargetMachine.cpp
+++ lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -107,6 +107,9 @@
   else
     Ret += "-f64:32:64";
 
+  // i128 is considered to always be aligned to 16 bytes
+  Ret += "-i128:128";
+
   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
   if (is64Bit)
     Ret += "-n32:64";
Index: lib/Target/Sparc/SparcTargetMachine.cpp
===================================================================
--- lib/Target/Sparc/SparcTargetMachine.cpp
+++ lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,8 +36,8 @@
   if (!is64Bit)
     Ret += "-p:32:32";
 
-  // Alignments for 64 bit integers.
-  Ret += "-i64:64";
+  // Alignments for 64 bit and 128 bit integers.
+  Ret += "-i64:64-i128:128";
 
   // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
   // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
Index: lib/Target/SystemZ/SystemZTargetMachine.cpp
===================================================================
--- lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -64,6 +64,9 @@
   // 64-bit integers are naturally aligned.
   Ret += "-i64:64";
 
+  // 128-bit integers are always aligned to 16 bytes.
+  Ret += "-i128:128";
+
   // 128-bit floats are aligned only to 64 bits.
   Ret += "-f128:64";
 
Index: lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- lib/Target/X86/X86TargetMachine.cpp
+++ lib/Target/X86/X86TargetMachine.cpp
@@ -80,6 +80,9 @@
   else
     Ret += "-f64:32:64";
 
+  // i128 is considered to always be 16-byte aligned
+  Ret += "-i128:128";
+
   // Some ABIs align long double to 128 bits, others to 32.
   if (TT.isOSNaCl() || TT.isOSIAMCU())
     ; // No f80
Index: test/CodeGen/Mips/llvm-ir/add.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/add.ll
+++ test/CodeGen/Mips/llvm-ir/add.ll
@@ -126,16 +126,16 @@
 entry:
 ; ALL-LABEL: add_i128:
 
-  ; GP32:       lw        $[[T0:[0-9]+]], 28($sp)
+  ; GP32:       lw        $[[T0:[0-9]+]], 44($fp)
   ; GP32:       addu      $[[T1:[0-9]+]], $7, $[[T0]]
   ; GP32:       sltu      $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-  ; GP32:       lw        $[[T3:[0-9]+]], 24($sp)
+  ; GP32:       lw        $[[T3:[0-9]+]], 40($fp)
   ; GP32:       addu      $[[T4:[0-9]+]], $[[T2]], $[[T3]]
   ; GP32:       addu      $[[T5:[0-9]+]], $6, $[[T4]]
   ; GP32:       sltu      $[[T6:[0-9]+]], $[[T5]], $[[T3]]
-  ; GP32:       lw        $[[T7:[0-9]+]], 20($sp)
+  ; GP32:       lw        $[[T7:[0-9]+]], 36($fp)
   ; GP32:       addu      $[[T8:[0-9]+]], $[[T6]], $[[T7]]
-  ; GP32:       lw        $[[T9:[0-9]+]], 16($sp)
+  ; GP32:       lw        $[[T9:[0-9]+]], 32($fp)
   ; GP32:       addu      $3, $5, $[[T8]]
   ; GP32:       sltu      $[[T10:[0-9]+]], $3, $[[T7]]
   ; GP32:       addu      $[[T11:[0-9]+]], $[[T10]], $[[T9]]
@@ -148,16 +148,16 @@
   ; GP64:       daddu     $[[T1:[0-9]+]], $[[T0]], $6
   ; GP64:       daddu     $2, $4, $[[T1]]
 
-  ; MM32:       lw        $[[T0:[0-9]+]], 28($sp)
+  ; MM32:       lw        $[[T0:[0-9]+]], 44($fp)
   ; MM32:       addu      $[[T1:[0-9]+]], $7, $[[T0]]
   ; MM32:       sltu      $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-  ; MM32:       lw        $[[T3:[0-9]+]], 24($sp)
+  ; MM32:       lw        $[[T3:[0-9]+]], 40($fp)
   ; MM32:       addu      $[[T4:[0-9]+]], $[[T2]], $[[T3]]
   ; MM32:       addu      $[[T5:[0-9]+]], $6, $[[T4]]
   ; MM32:       sltu      $[[T6:[0-9]+]], $[[T5]], $[[T3]]
-  ; MM32:       lw        $[[T7:[0-9]+]], 20($sp)
+  ; MM32:       lw        $[[T7:[0-9]+]], 36($fp)
   ; MM32:       addu      $[[T8:[0-9]+]], $[[T6]], $[[T7]]
-  ; MM32:       lw        $[[T9:[0-9]+]], 16($sp)
+  ; MM32:       lw        $[[T9:[0-9]+]], 32($fp)
   ; MM32:       addu      $[[T10:[0-9]+]], $5, $[[T8]]
   ; MM32:       sltu      $[[T11:[0-9]+]], $[[T10]], $[[T7]]
   ; MM32:       addu      $[[T12:[0-9]+]], $[[T11]], $[[T9]]
Index: test/CodeGen/Mips/llvm-ir/and.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/and.ll
+++ test/CodeGen/Mips/llvm-ir/and.ll
@@ -119,25 +119,25 @@
 entry:
 ; ALL-LABEL: and_i128:
 
-  ; GP32:         lw      $[[T0:[0-9]+]], 20($sp)
-  ; GP32:         lw      $[[T1:[0-9]+]], 16($sp)
+  ; GP32:         lw      $[[T0:[0-9]+]], 36($fp)
+  ; GP32:         lw      $[[T1:[0-9]+]], 32($fp)
   ; GP32:         and     $2, $4, $[[T1]]
   ; GP32:         and     $3, $5, $[[T0]]
-  ; GP32:         lw      $[[T2:[0-9]+]], 24($sp)
+  ; GP32:         lw      $[[T2:[0-9]+]], 40($fp)
   ; GP32:         and     $4, $6, $[[T2]]
-  ; GP32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; GP32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; GP32:         and     $5, $7, $[[T3]]
 
   ; GP64:         and     $2, $4, $6
   ; GP64:         and     $3, $5, $7
 
-  ; MM32:         lw      $[[T0:[0-9]+]], 20($sp)
-  ; MM32:         lw      $[[T1:[0-9]+]], 16($sp)
+  ; MM32:         lw      $[[T0:[0-9]+]], 36($fp)
+  ; MM32:         lw      $[[T1:[0-9]+]], 32($fp)
   ; MM32:         and16   $[[T1]], $4
   ; MM32:         and16   $[[T0]], $5
-  ; MM32:         lw      $[[T2:[0-9]+]], 24($sp)
+  ; MM32:         lw      $[[T2:[0-9]+]], 40($fp)
   ; MM32:         and16   $[[T2]], $6
-  ; MM32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; MM32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; MM32:         and16   $[[T3]], $7
 
   ; MM64:         and     $2, $4, $6
Index: test/CodeGen/Mips/llvm-ir/not.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/not.ll
+++ test/CodeGen/Mips/llvm-ir/not.ll
@@ -209,25 +209,25 @@
 entry:
 ; ALL-LABEL: nor_i128:
 
-  ; GP32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; GP32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; GP32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; GP32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; GP32:         nor     $2, $[[T2]], $4
   ; GP32:         nor     $3, $[[T1]], $5
-  ; GP32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; GP32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; GP32:         nor     $4, $[[T0]], $6
-  ; GP32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; GP32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; GP32:         nor     $5, $[[T3]], $7
 
   ; GP64:         nor     $2, $6, $4
   ; GP64:         nor     $3, $7, $5
 
-  ; MM32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; MM32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; MM32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; MM32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; MM32:         nor     $2, $[[T2]], $4
   ; MM32:         nor     $3, $[[T1]], $5
-  ; MM32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; MM32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; MM32:         nor     $4, $[[T0]], $6
-  ; MM32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; MM32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; MM32:         nor     $5, $[[T3]], $7
 
   ; MM64:         nor     $2, $6, $4
Index: test/CodeGen/Mips/llvm-ir/or.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/or.ll
+++ test/CodeGen/Mips/llvm-ir/or.ll
@@ -107,25 +107,25 @@
 entry:
 ; ALL-LABEL: or_i128:
 
-  ; GP32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; GP32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; GP32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; GP32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; GP32:         or      $2, $4, $[[T2]]
   ; GP32:         or      $3, $5, $[[T1]]
-  ; GP32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; GP32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; GP32:         or      $4, $6, $[[T0]]
-  ; GP32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; GP32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; GP32:         or      $5, $7, $[[T3]]
 
   ; GP64:         or      $2, $4, $6
   ; GP64:         or      $3, $5, $7
 
-  ; MM32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; MM32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; MM32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; MM32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; MM32:         or16    $[[T2]], $4
   ; MM32:         or16    $[[T1]], $5
-  ; MM32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; MM32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; MM32:         or16    $[[T0]], $6
-  ; MM32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; MM32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; MM32:         or16    $[[T3]], $7
 
   ; MM64:         or      $2, $4, $6
Index: test/CodeGen/Mips/llvm-ir/sub.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/sub.ll
+++ test/CodeGen/Mips/llvm-ir/sub.ll
@@ -114,12 +114,12 @@
 entry:
 ; ALL-LABEL: sub_i128:
 
-  ; GP32-NOT-MM:    lw        $[[T0:[0-9]+]], 20($sp)
+  ; GP32-NOT-MM:    lw        $[[T0:[0-9]+]], 36($fp)
   ; GP32-NOT-MM:    sltu      $[[T1:[0-9]+]], $5, $[[T0]]
-  ; GP32-NOT-MM:    lw        $[[T2:[0-9]+]], 16($sp)
+  ; GP32-NOT-MM:    lw        $[[T2:[0-9]+]], 32($fp)
   ; GP32-NOT-MM:    addu      $[[T3:[0-9]+]], $[[T1]], $[[T2]]
-  ; GP32-NOT-MM:    lw        $[[T4:[0-9]+]], 24($sp)
-  ; GP32-NOT-MM:    lw        $[[T5:[0-9]+]], 28($sp)
+  ; GP32-NOT-MM:    lw        $[[T4:[0-9]+]], 40($fp)
+  ; GP32-NOT-MM:    lw        $[[T5:[0-9]+]], 44($fp)
   ; GP32-NOT-MM:    subu      $[[T6:[0-9]+]], $7, $[[T5]]
   ; GP32-NOT-MM:    subu      $2, $4, $[[T3]]
   ; GP32-NOT-MM:    sltu      $[[T8:[0-9]+]], $6, $[[T4]]
@@ -130,12 +130,12 @@
   ; GP32-NOT-MM:    subu      $4, $6, $[[T11]]
   ; GP32-NOT-MM:    move      $5, $[[T6]]
 
-  ; GP32-MM:        lw        $[[T0:[0-9]+]], 20($sp)
+  ; GP32-MM:        lw        $[[T0:[0-9]+]], 36($fp)
   ; GP32-MM:        sltu      $[[T1:[0-9]+]], $[[T2:[0-9]+]], $[[T0]]
-  ; GP32-MM:        lw        $[[T3:[0-9]+]], 16($sp)
+  ; GP32-MM:        lw        $[[T3:[0-9]+]], 32($fp)
   ; GP32-MM:        addu      $[[T3]], $[[T1]], $[[T3]]
-  ; GP32-MM:        lw        $[[T4:[0-9]+]], 24($sp)
-  ; GP32-MM:        lw        $[[T5:[0-9]+]], 28($sp)
+  ; GP32-MM:        lw        $[[T4:[0-9]+]], 40($fp)
+  ; GP32-MM:        lw        $[[T5:[0-9]+]], 44($fp)
   ; GP32-MM:        subu      $[[T1]], $7, $[[T5]]
   ; GP32-MM:        subu      $[[T3]], $[[T6:[0-9]+]], $[[T3]]
   ; GP32-MM:        sltu      $[[T6]], $6, $[[T4]]
Index: test/CodeGen/Mips/llvm-ir/xor.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/xor.ll
+++ test/CodeGen/Mips/llvm-ir/xor.ll
@@ -117,25 +117,25 @@
 entry:
 ; ALL-LABEL: xor_i128:
 
-  ; GP32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; GP32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; GP32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; GP32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; GP32:         xor     $2, $4, $[[T2]]
   ; GP32:         xor     $3, $5, $[[T1]]
-  ; GP32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; GP32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; GP32:         xor     $4, $6, $[[T0]]
-  ; GP32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; GP32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; GP32:         xor     $5, $7, $[[T3]]
 
   ; GP64:         xor     $2, $4, $6
   ; GP64:         xor     $3, $5, $7
 
-  ; MM32:         lw      $[[T1:[0-9]+]], 20($sp)
-  ; MM32:         lw      $[[T2:[0-9]+]], 16($sp)
+  ; MM32:         lw      $[[T1:[0-9]+]], 36($fp)
+  ; MM32:         lw      $[[T2:[0-9]+]], 32($fp)
   ; MM32:         xor16   $[[T2]], $4
   ; MM32:         xor16   $[[T1]], $5
-  ; MM32:         lw      $[[T0:[0-9]+]], 24($sp)
+  ; MM32:         lw      $[[T0:[0-9]+]], 40($fp)
   ; MM32:         xor16   $[[T0]], $6
-  ; MM32:         lw      $[[T3:[0-9]+]], 28($sp)
+  ; MM32:         lw      $[[T3:[0-9]+]], 44($fp)
   ; MM32:         xor16   $[[T3]], $7
 
   ; MM64:         xor     $2, $4, $6
Index: test/CodeGen/X86/catchpad-dynamic-alloca.ll
===================================================================
--- test/CodeGen/X86/catchpad-dynamic-alloca.ll
+++ test/CodeGen/X86/catchpad-dynamic-alloca.ll
@@ -62,4 +62,4 @@
 ; CHECK-LABEL: $handlerMap$0$test2:
 ; CHECK:      .long   0
 ; CHECK-NEXT: .long   0
-; CHECK-NEXT: .long   8
+; CHECK-NEXT: .long   16
Index: test/CodeGen/X86/libcall-sret.ll
===================================================================
--- test/CodeGen/X86/libcall-sret.ll
+++ test/CodeGen/X86/libcall-sret.ll
@@ -11,7 +11,7 @@
 
   ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical
   ; (aligned) place for the actual sret data is %esp + 20.
-; CHECK: leal 20(%esp), [[SRET_ADDR:%[a-z]+]]
+; CHECK: leal 12(%esp), [[SRET_ADDR:%[a-z]+]]
 ; CHECK: pushl 72(%esp)
 ; CHECK: pushl 72(%esp)
 ; CHECK: pushl 72(%esp)
@@ -25,10 +25,10 @@
 ; CHECK: calll __multi3
 
 ; CHECK: addl $44, %esp
-; CHECK-DAG: movl 8(%esp), [[RES0:%[a-z]+]]
-; CHECK-DAG: movl 12(%esp), [[RES1:%[a-z]+]]
-; CHECK-DAG: movl 16(%esp), [[RES2:%[a-z]+]]
-; CHECK-DAG: movl 20(%esp), [[RES3:%[a-z]+]]
+; CHECK-DAG: movl (%esp), [[RES0:%[a-z]+]]
+; CHECK-DAG: movl 4(%esp), [[RES1:%[a-z]+]]
+; CHECK-DAG: movl 8(%esp), [[RES2:%[a-z]+]]
+; CHECK-DAG: movl 12(%esp), [[RES3:%[a-z]+]]
 ; CHECK-DAG: movl [[RES0]], var
 ; CHECK-DAG: movl [[RES1]], var+4
 ; CHECK-DAG: movl [[RES2]], var+8
Index: test/CodeGen/X86/mul-i1024.ll
===================================================================
--- test/CodeGen/X86/mul-i1024.ll
+++ test/CodeGen/X86/mul-i1024.ll
@@ -10,8 +10,8 @@
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $2496, %esp # imm = 0x9C0
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $2512, %esp # imm = 0x9D0
 ; X32-NEXT:    movl 12(%ebp), %edx
 ; X32-NEXT:    movl 8(%ebp), %ecx
 ; X32-NEXT:    movl 40(%ecx), %eax
@@ -21,7 +21,7 @@
 ; X32-NEXT:    movl (%ecx), %eax
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl 4(%ecx), %eax
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl 32(%ecx), %eax
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl 36(%ecx), %eax
@@ -1920,7 +1920,7 @@
 ; X32-NEXT:    adcl %ecx, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    addl %eax, %edi
@@ -1933,14 +1933,14 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
 ; X32-NEXT:    adcl %ebx, %esi
 ; X32-NEXT:    movl $0, %eax
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    sbbl %eax, %eax
 ; X32-NEXT:    andl $1, %eax
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    movl %eax, %edx
@@ -1955,7 +1955,7 @@
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl %eax, (%esp) # 4-byte Folded Spill
+; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %eax
@@ -1978,7 +1978,7 @@
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2031,7 +2031,7 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %ebx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl %esi, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -2102,16 +2102,16 @@
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    addl %eax, %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %ebx
-; X32-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
 ; X32-NEXT:    adcl %edi, %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
@@ -2135,7 +2135,7 @@
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2205,13 +2205,13 @@
 ; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl $0, %eax
 ; X32-NEXT:    adcl $0, %eax
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl $0, %eax
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
@@ -2264,7 +2264,7 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
@@ -2300,7 +2300,7 @@
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    adcl $0, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -2351,9 +2351,9 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
@@ -2415,7 +2415,7 @@
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl %edx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl %edx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
@@ -2545,7 +2545,7 @@
 ; X32-NEXT:    adcl %eax, %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
@@ -2742,7 +2742,7 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
-; X32-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
@@ -2753,7 +2753,7 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %edi
-; X32-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl %ecx, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
@@ -2770,14 +2770,14 @@
 ; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
@@ -2827,9 +2827,9 @@
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
@@ -2843,9 +2843,9 @@
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    adcl %edx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
@@ -2889,7 +2889,7 @@
 ; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
 ; X32-NEXT:    adcl $0, %ecx
@@ -2904,7 +2904,7 @@
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
@@ -3112,7 +3112,7 @@
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl %esi, (%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
 ; X32-NEXT:    adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
@@ -3839,7 +3839,7 @@
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
Index: test/CodeGen/X86/mul-i512.ll
===================================================================
--- test/CodeGen/X86/mul-i512.ll
+++ test/CodeGen/X86/mul-i512.ll
@@ -10,8 +10,8 @@
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $632, %esp # imm = 0x278
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $640, %esp # imm = 0x280
 ; X32-NEXT:    movl 12(%ebp), %ecx
 ; X32-NEXT:    movl 8(%ebp), %ebx
 ; X32-NEXT:    movl (%ebx), %edi
Index: test/CodeGen/X86/osx-private-labels.ll
===================================================================
--- test/CodeGen/X86/osx-private-labels.ll
+++ test/CodeGen/X86/osx-private-labels.ll
@@ -36,7 +36,7 @@
 
 @private6 = private unnamed_addr constant i128 42
 ; CHECK: .section	__TEXT,__literal16,16byte_literals
-; CHECK-NEXT: .p2align	3
+; CHECK-NEXT: .p2align	4
 ; CHECK-NEXT: L_private6:
 
 %struct._objc_class = type { i8* }