Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -7068,6 +7068,10 @@ // 3. Other forms of the same operation (intrinsics and other variants) bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { + case X86::ADD8rr: + case X86::ADD16rr: + case X86::ADD32rr: + case X86::ADD64rr: case X86::AND8rr: case X86::AND16rr: case X86::AND32rr: Index: test/CodeGen/X86/DynamicCalleeSavedRegisters.ll =================================================================== --- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll +++ test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck %s declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0); @@ -5,16 +6,32 @@ ; In RegCall calling convention, ESI and EDI are callee saved registers. ; One might think that the caller could assume that ESI value is the same before ; and after calling the callee. -; However, RegCall also says that a register that was used for +; However, RegCall also says that a register that was used for ; passing/returning argumnets, can be assumed to be modified by the callee. ; In other words, it is no longer a callee saved register. ; In this case we want to see that EDX/ECX values are saved and EDI/ESI are assumed ; to be modified by the callee. ; This is a hipe CC function that doesn't save any register for the caller. ; So we can be sure that there is no other reason to save EDX/ECX. -; The caller arguments are expected to be passed (in the following order) +; The caller arguments are expected to be passed (in the following order) ; in registers: ESI, EBP, EAX, EDX and ECX. define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: calll callee +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: leal (%ebx,%eax), %esi +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) %b2 = add i32 %b1, %d0 %b3 = add i32 %b2, %e0 @@ -42,10 +59,23 @@ !2 = !{ !"AMD64_LEAF_WORDS", i32 18 } ; Make sure that the callee doesn't save parameters that were passed as arguments. -; The caller arguments are expected to be passed (in the following order) +; The caller arguments are expected to be passed (in the following order) ; in registers: EAX, ECX, EDX, EDI and ESI. ; The result will return in EAX, ECX and EDX. define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { +; CHECK-LABEL: test_callee: +; CHECK: # %bb.0: +; CHECK-NEXT: leal (,%esi,8), %ecx +; CHECK-NEXT: subl %esi, %ecx +; CHECK-NEXT: movl $5, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %esi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: leal (,%edi,8), %edx +; CHECK-NEXT: subl %edi, %edx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: retl %b1 = mul i32 7, %e0 %b2 = udiv i32 5, %e0 %b3 = mul i32 7, %d0 Index: test/CodeGen/X86/alias-static-alloca.ll =================================================================== --- test/CodeGen/X86/alias-static-alloca.ll +++ test/CodeGen/X86/alias-static-alloca.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - -mtriple=x86_64-linux-gnu %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -17,6 +18,18 @@ ; CHECK: retq define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: addl %edx, %esi +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NEXT: retq entry: %a0 = alloca i32 %a1 = alloca i32 Index: test/CodeGen/X86/avx512-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-x86_64.ll +++ test/CodeGen/X86/avx512-intrinsics-x86_64.ll @@ -117,10 +117,10 @@ define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %rax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -136,10 +136,10 @@ define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtsd2si %xmm0, %rax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtsd2si %xmm0, %rcx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -155,10 +155,10 @@ define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtss2usi %xmm0, %rax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtss2usi %xmm0, %rcx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq @@ -174,10 +174,10 @@ define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si64: ; CHECK: ## %bb.0: -; CHECK-NEXT: vcvtss2si %xmm0, %rax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: vcvtss2si %xmm0, %rcx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rdx ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -934,10 +934,10 @@ define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %eax -; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -953,10 +953,10 @@ define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2si32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtsd2si %xmm0, %eax -; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtsd2si %xmm0, %ecx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -972,10 +972,10 @@ define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2usi32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtss2usi %xmm0, %eax -; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtss2usi %xmm0, %ecx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} @@ -991,10 +991,10 @@ define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtss2si32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtss2si %xmm0, %eax -; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: vcvtss2si %xmm0, %ecx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %edx ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax +; CHECK-NEXT: addl %edx, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ret{{[l|q]}} Index: test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-Mask.ll +++ test/CodeGen/X86/avx512-regcall-Mask.ll @@ -35,32 +35,32 @@ ; ; WIN64-LABEL: test_argv64i1: ; WIN64: # %bb.0: +; WIN64-NEXT: addq %rdx, %rcx +; WIN64-NEXT: addq %rdi, %rcx +; WIN64-NEXT: addq %rsi, %rcx +; WIN64-NEXT: addq %r8, %rcx +; WIN64-NEXT: addq %r9, %rcx +; WIN64-NEXT: addq %r10, %rcx +; WIN64-NEXT: addq %r11, %rcx +; WIN64-NEXT: addq %r12, %rcx +; WIN64-NEXT: addq %r14, %rcx +; WIN64-NEXT: addq %r15, %rcx ; WIN64-NEXT: addq %rcx, %rax -; WIN64-NEXT: addq %rdx, %rax -; WIN64-NEXT: addq %rdi, %rax -; WIN64-NEXT: addq %rsi, %rax -; WIN64-NEXT: addq %r8, %rax -; WIN64-NEXT: addq %r9, %rax -; WIN64-NEXT: addq %r10, %rax -; WIN64-NEXT: addq %r11, %rax -; WIN64-NEXT: addq %r12, %rax -; WIN64-NEXT: addq %r14, %rax -; WIN64-NEXT: addq %r15, %rax ; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; WIN64-NEXT: retq ; ; LINUXOSX64-LABEL: test_argv64i1: ; LINUXOSX64: # %bb.0: +; LINUXOSX64-NEXT: addq %rdx, %rcx +; LINUXOSX64-NEXT: addq %rdi, %rcx +; LINUXOSX64-NEXT: addq %rsi, %rcx +; LINUXOSX64-NEXT: addq %r8, %rcx +; LINUXOSX64-NEXT: addq %r9, %rcx +; LINUXOSX64-NEXT: addq %r12, %rcx +; LINUXOSX64-NEXT: addq %r13, %rcx +; LINUXOSX64-NEXT: addq %r14, %rcx +; LINUXOSX64-NEXT: addq %r15, %rcx ; LINUXOSX64-NEXT: addq %rcx, %rax -; LINUXOSX64-NEXT: addq %rdx, %rax -; LINUXOSX64-NEXT: addq %rdi, %rax -; LINUXOSX64-NEXT: addq %rsi, %rax -; LINUXOSX64-NEXT: addq %r8, %rax -; LINUXOSX64-NEXT: addq %r9, %rax -; LINUXOSX64-NEXT: addq %r12, %rax -; LINUXOSX64-NEXT: addq %r13, %rax -; LINUXOSX64-NEXT: addq %r14, %rax -; LINUXOSX64-NEXT: addq %r15, %rax ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax ; LINUXOSX64-NEXT: retq Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -971,44 +971,45 @@ ; X32-NEXT: subl $20, %esp ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: subl %ecx, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: imull %ebp, %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: subl %edi, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %ebp, %ecx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: imull %ebp, %eax -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: addl {{[0-9]+}}(%esp), %edi -; X32-NEXT: addl {{[0-9]+}}(%esp), %esi -; X32-NEXT: imull %eax, %esi -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %edi, %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %edx ; X32-NEXT: imull %ebp, %edx -; X32-NEXT: addl %esi, %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: imull %edi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: subl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: imull %esi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %edx, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %ebp, %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: imull (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[0-9]+}}(%esp), %edi +; X32-NEXT: imull %esi, %edi +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: imull %edx, %eax +; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: addl $20, %esp ; X32-NEXT: popl %ebx @@ -1020,35 +1021,41 @@ ; WIN64-NEXT: pushq %r13 ; WIN64-NEXT: pushq %rbp ; WIN64-NEXT: pushq %rbx +; WIN64-NEXT: subq $4, %rsp +; WIN64-NEXT: movl %ecx, %ebx +; WIN64-NEXT: movl %ecx, (%rsp) # 4-byte Spill +; WIN64-NEXT: movl %eax, %ecx ; WIN64-NEXT: movl %eax, %r13d -; WIN64-NEXT: subl %ecx, %eax +; WIN64-NEXT: subl %ebx, %r13d ; WIN64-NEXT: movl %edx, %ebp ; WIN64-NEXT: subl %edi, %ebp ; WIN64-NEXT: movl %r9d, %ebx ; WIN64-NEXT: subl %r10d, %ebx -; WIN64-NEXT: imull %ebx, %eax +; WIN64-NEXT: imull %ebx, %r13d ; WIN64-NEXT: movl %r11d, %ebx ; WIN64-NEXT: subl %r12d, %ebx ; WIN64-NEXT: imull %ebp, %ebx ; WIN64-NEXT: movl %esi, %ebp ; WIN64-NEXT: subl %r8d, %ebp +; WIN64-NEXT: movl %r14d, %eax +; WIN64-NEXT: subl %r15d, %eax +; WIN64-NEXT: imull %ebp, %eax ; WIN64-NEXT: addl %ebx, %eax -; WIN64-NEXT: movl %r14d, %ebx -; WIN64-NEXT: subl %r15d, %ebx -; WIN64-NEXT: imull %ebp, %ebx -; WIN64-NEXT: addl %ebx, %eax -; WIN64-NEXT: addl %ecx, %r13d +; WIN64-NEXT: addl (%rsp), %ecx # 4-byte Folded Reload ; WIN64-NEXT: addl %edi, %edx ; WIN64-NEXT: addl %r8d, %esi ; WIN64-NEXT: addl %r10d, %r9d -; WIN64-NEXT: imull %r13d, %r9d +; WIN64-NEXT: imull %ecx, %r9d ; WIN64-NEXT: addl %r12d, %r11d ; WIN64-NEXT: imull %edx, %r11d -; WIN64-NEXT: addl %r9d, %r11d ; WIN64-NEXT: addl %r15d, %r14d ; WIN64-NEXT: imull %esi, %r14d ; WIN64-NEXT: addl %r11d, %r14d -; WIN64-NEXT: addl %r14d, %eax +; WIN64-NEXT: addl %r9d, %r14d +; WIN64-NEXT: addl %eax, %r14d +; WIN64-NEXT: addl %r14d, %r13d +; WIN64-NEXT: movl %r13d, %eax +; WIN64-NEXT: addq $4, %rsp ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: popq %rbp ; WIN64-NEXT: popq %r13 @@ -1058,36 +1065,38 @@ ; LINUXOSX64: # %bb.0: ; LINUXOSX64-NEXT: pushq %rbp ; LINUXOSX64-NEXT: pushq %rbx +; LINUXOSX64-NEXT: movl %ecx, %r11d +; LINUXOSX64-NEXT: movl %eax, %ecx ; LINUXOSX64-NEXT: movl %eax, %r10d -; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r11d -; LINUXOSX64-NEXT: subl %ecx, %eax +; LINUXOSX64-NEXT: subl %r11d, %r10d ; LINUXOSX64-NEXT: movl %edx, %ebx ; LINUXOSX64-NEXT: subl %edi, %ebx ; LINUXOSX64-NEXT: movl %r9d, %ebp ; LINUXOSX64-NEXT: subl %r12d, %ebp -; LINUXOSX64-NEXT: imull %ebp, %eax +; LINUXOSX64-NEXT: imull %ebp, %r10d ; LINUXOSX64-NEXT: movl %r13d, %ebp ; LINUXOSX64-NEXT: subl %r14d, %ebp ; LINUXOSX64-NEXT: imull %ebx, %ebp ; LINUXOSX64-NEXT: movl %esi, %ebx ; LINUXOSX64-NEXT: subl %r8d, %ebx +; LINUXOSX64-NEXT: movl %r15d, %eax +; LINUXOSX64-NEXT: subl {{[0-9]+}}(%rsp), %eax +; LINUXOSX64-NEXT: imull %ebx, %eax ; LINUXOSX64-NEXT: addl %ebp, %eax -; LINUXOSX64-NEXT: movl %r15d, %ebp -; LINUXOSX64-NEXT: subl %r11d, %ebp -; LINUXOSX64-NEXT: imull %ebx, %ebp -; LINUXOSX64-NEXT: addl %ebp, %eax -; LINUXOSX64-NEXT: addl %ecx, %r10d +; LINUXOSX64-NEXT: addl %r11d, %ecx ; LINUXOSX64-NEXT: addl %edi, %edx ; LINUXOSX64-NEXT: addl %r8d, %esi ; LINUXOSX64-NEXT: addl %r12d, %r9d -; LINUXOSX64-NEXT: imull %r10d, %r9d +; LINUXOSX64-NEXT: imull %ecx, %r9d ; LINUXOSX64-NEXT: addl %r14d, %r13d ; LINUXOSX64-NEXT: imull %edx, %r13d -; LINUXOSX64-NEXT: addl %r9d, %r13d -; LINUXOSX64-NEXT: addl %r11d, %r15d +; LINUXOSX64-NEXT: addl {{[0-9]+}}(%rsp), %r15d ; LINUXOSX64-NEXT: imull %esi, %r15d ; LINUXOSX64-NEXT: addl %r13d, %r15d -; LINUXOSX64-NEXT: addl %r15d, %eax +; LINUXOSX64-NEXT: addl %r9d, %r15d +; LINUXOSX64-NEXT: addl %eax, %r15d +; LINUXOSX64-NEXT: addl %r15d, %r10d +; LINUXOSX64-NEXT: movl %r10d, %eax ; LINUXOSX64-NEXT: popq %rbx ; LINUXOSX64-NEXT: popq %rbp ; LINUXOSX64-NEXT: retq Index: test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1732,19 +1732,19 @@ ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1] ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -1847,23 +1847,23 @@ ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] -; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] +; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05] ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] -; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] +; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -1944,19 +1944,19 @@ ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1] ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -2059,23 +2059,23 @@ ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] -; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] -; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] +; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] +; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] +; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] +; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] +; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05] ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] -; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] +; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2] ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] +; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2105,19 +2105,19 @@ ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] -; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2127,19 +2127,19 @@ ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) @@ -2169,23 +2169,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] -; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] -; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -2195,23 +2195,23 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] -; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) @@ -2241,19 +2241,19 @@ ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] -; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2263,19 +2263,19 @@ ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1] ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) @@ -2305,23 +2305,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] -; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] +; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] +; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] -; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] +; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6] ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -2331,23 +2331,23 @@ ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] -; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] -; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] +; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] +; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] -; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] +; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] +; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] -; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] +; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) Index: test/CodeGen/X86/fold-tied-op.ll =================================================================== --- test/CodeGen/X86/fold-tied-op.ll +++ test/CodeGen/X86/fold-tied-op.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s ; Regression test for http://reviews.llvm.org/D5701 @@ -19,6 +20,111 @@ ; Function Attrs: nounwind uwtable define i64 @fn1() #0 { +; CHECK-LABEL: fn1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: movl $-1028477379, %edi # imm = 0xC2B2AE3D +; CHECK-NEXT: movl $668265295, %ecx # imm = 0x27D4EB4F +; CHECK-NEXT: movl a, %ebx +; CHECK-NEXT: cmpl $0, (%ebx) +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl 8(%ebx), %edx +; CHECK-NEXT: movl 12(%ebx), %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shldl $1, %edx, %esi +; CHECK-NEXT: orl %edi, %esi +; CHECK-NEXT: leal (%edx,%edx), %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 16(%ebx), %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 20(%ebx), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: shldl $2, %edi, %edx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: shldl $31, %eax, %edi +; CHECK-NEXT: shll $2, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: shrl %edi +; CHECK-NEXT: orl %edx, %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %esi, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 28(%ebx), %edi +; CHECK-NEXT: movl 24(%ebx), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D +; CHECK-NEXT: imull %eax, %ebx +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: imull %edi, %ecx +; CHECK-NEXT: addl %ebx, %ecx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $1336530590, %edi, %edi # imm = 0x4FA9D69E +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: imull $-2056954758, %eax, %edx # imm = 0x85655C7A +; CHECK-NEXT: addl %edx, %edi +; CHECK-NEXT: movl $1336530590, %edx # imm = 0x4FA9D69E +; CHECK-NEXT: mull %edx +; CHECK-NEXT: addl %edx, %edi +; CHECK-NEXT: shrdl $3, %ecx, %esi +; CHECK-NEXT: sarl $3, %ecx +; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: imull $326129324, %esi, %eax # imm = 0x137056AC +; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87 +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl $-66860409, %edi # imm = 0xFC03CA87 +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: mull %edi +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, b+4 +; CHECK-NEXT: imull $326129324, %eax, %edx # imm = 0x137056AC +; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87 +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: movl %eax, b +; CHECK-NEXT: mull %edi +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: xorl b+4, %edi +; CHECK-NEXT: xorl b, %ecx +; CHECK-NEXT: movl $1419758215, %edx # imm = 0x549FCA87 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: mull %edx +; CHECK-NEXT: imull $93298681, %ecx, %esi # imm = 0x58F9FF9 +; CHECK-NEXT: imull $1419758215, %edi, %ecx # imm = 0x549FCA87 +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl $-1028477341, %eax # imm = 0xC2B2AE63 +; CHECK-NEXT: adcl $-2048144777, %ecx # imm = 0x85EBCA77 +; CHECK-NEXT: movl %eax, b +; CHECK-NEXT: movl %ecx, b+4 +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: retl entry: %0 = load i32, i32* @a, align 4, !tbaa !1 %1 = inttoptr i32 %0 to %struct.XXH_state64_t* Index: test/CodeGen/X86/h-registers-1.ll =================================================================== --- test/CodeGen/X86/h-registers-1.ll +++ test/CodeGen/X86/h-registers-1.ll @@ -31,11 +31,11 @@ ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-NEXT: addq %r10, %rsi ; CHECK-NEXT: addq %rbp, %rdx -; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: addq %rdi, %rcx ; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbp @@ -65,11 +65,11 @@ ; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; GNUX32-NEXT: addq %r10, %rsi ; GNUX32-NEXT: addq %rbp, %rdx -; GNUX32-NEXT: addq %rsi, %rdx ; GNUX32-NEXT: addq %rdi, %rcx ; GNUX32-NEXT: addq %rbx, %rax ; GNUX32-NEXT: addq %rcx, %rax ; GNUX32-NEXT: addq %rdx, %rax +; GNUX32-NEXT: addq %rsi, %rax ; GNUX32-NEXT: popq %rbx ; GNUX32-NEXT: .cfi_def_cfa_offset 16 ; GNUX32-NEXT: popq %rbp Index: test/CodeGen/X86/hipe-cc.ll =================================================================== --- test/CodeGen/X86/hipe-cc.ll +++ test/CodeGen/X86/hipe-cc.ll @@ -1,43 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s ; Check the HiPE calling convention works (x86-32) define void @zap(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: zap: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl $8, %ecx +; CHECK-NEXT: calll addfour +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: calll foo +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: - ; CHECK: movl 40(%esp), %eax - ; CHECK-NEXT: movl 44(%esp), %edx - ; CHECK-NEXT: movl $8, %ecx - ; CHECK-NEXT: calll addfour %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8) %res = extractvalue {i32, i32, i32} %0, 2 - ; CHECK: movl %eax, 16(%esp) - ; CHECK-NEXT: movl $2, 12(%esp) - ; CHECK-NEXT: movl $1, 8(%esp) - ; CHECK: calll foo tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind ret void } define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: addfour: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retl entry: - ; CHECK: addl %edx, %eax - ; CHECK-NEXT: addl %ecx, %eax %0 = add i32 %x, %y %1 = add i32 %0, %z - ; CHECK: ret %res = insertvalue {i32, i32, i32} undef, i32 %1, 2 ret {i32, i32, i32} %res } define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, (%esp) +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: jmp bar # TAILCALL entry: - ; CHECK: movl %esi, 16(%esp) - ; CHECK-NEXT: movl %ebp, 12(%esp) - ; CHECK-NEXT: movl %eax, 8(%esp) - ; CHECK-NEXT: movl %edx, 4(%esp) - ; CHECK-NEXT: movl %ecx, (%esp) %hp_var = alloca i32 %p_var = alloca i32 %arg0_var = alloca i32 @@ -54,17 +75,18 @@ %2 = load i32, i32* %arg0_var %3 = load i32, i32* %arg1_var %4 = load i32, i32* %arg2_var - ; CHECK: jmp bar tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind ret void } define cc 11 void @baz() nounwind { +; CHECK-LABEL: baz: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: jmpl *clos # TAILCALL %tmp_clos = load i32, i32* @clos %tmp_clos2 = inttoptr i32 %tmp_clos to i32* %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)* - ; CHECK: movl $42, %eax - ; CHECK-NEXT: jmpl *clos tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind ret void } @@ -72,11 +94,17 @@ ; Sanity-check the tail call sequence. Number of arguments was chosen as to ; expose a bug where the tail call sequence clobbered the stack. define cc 11 { i32, i32, i32 } @tailcaller(i32 %hp, i32 %p) nounwind { - ; CHECK: movl $15, %eax - ; CHECK-NEXT: movl $31, %edx - ; CHECK-NEXT: movl $47, %ecx - ; CHECK-NEXT: popl %edi - ; CHECK-NEXT: jmp tailcallee +; CHECK-LABEL: tailcaller: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $63, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $15, %eax +; CHECK-NEXT: movl $31, %edx +; CHECK-NEXT: movl $47, %ecx +; CHECK-NEXT: popl %edi +; CHECK-NEXT: jmp tailcallee # TAILCALL %ret = tail call cc11 { i32, i32, i32 } @tailcallee(i32 %hp, i32 %p, i32 15, i32 31, i32 47, i32 63) nounwind ret { i32, i32, i32 } %ret Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -1,48 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -relocation-model=static -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s ; Check the HiPE calling convention works (x86-64) define void @zap(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: zap: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: movl $8, %ecx +; CHECK-NEXT: movl $9, %r8d +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: callq addfour +; CHECK-NEXT: movl $1, %edx +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: movl $3, %r8d +; CHECK-NEXT: movq %rax, %r9 +; CHECK-NEXT: callq foo +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq entry: - ; CHECK: movq %rsi, %rdx - ; CHECK-NEXT: movl $8, %ecx - ; CHECK-NEXT: movl $9, %r8d - ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 - ; CHECK: movl $1, %edx - ; CHECK-NEXT: movl $2, %ecx - ; CHECK-NEXT: movl $3, %r8d - ; CHECK-NEXT: movq %rax, %r9 - ; CHECK: callq foo tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind ret void } define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind { +; CHECK-LABEL: addfour: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: leaq (%rdx,%rsi), %rax +; CHECK-NEXT: retq entry: - ; CHECK: leaq (%rsi,%rdx), %rax - ; CHECK-NEXT: addq %rcx, %rax - ; CHECK-NEXT: addq %r8, %rax %0 = add i64 %x, %y %1 = add i64 %0, %z %2 = add i64 %1, %w - ; CHECK: ret %res = insertvalue {i64, i64, i64} undef, i64 %2, 2 ret {i64, i64, i64} %res } define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $48, %rsp +; CHECK-NEXT: movq %r15, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rbp, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r8, (%rsp) +; CHECK-NEXT: addq $48, %rsp +; CHECK-NEXT: jmp bar # TAILCALL entry: - ; CHECK: movq %r15, 40(%rsp) - ; CHECK-NEXT: movq %rbp, 32(%rsp) - ; CHECK-NEXT: movq %rsi, 24(%rsp) - ; CHECK-NEXT: movq %rdx, 16(%rsp) - ; CHECK-NEXT: movq %rcx, 8(%rsp) - ; CHECK-NEXT: movq %r8, (%rsp) %hp_var = alloca i64 %p_var = alloca i64 %arg0_var = alloca i64 @@ -56,24 +79,26 @@ store i64 %arg2, i64* %arg2_var store i64 %arg3, i64* %arg3_var - ; Loads are reading values just writen from corresponding register and are therefore noops. + ; Loads are reading values just writen from corresponding register and are therefore noops. %0 = load i64, i64* %hp_var %1 = load i64, i64* %p_var %2 = load i64, i64* %arg0_var %3 = load i64, i64* %arg1_var %4 = load i64, i64* %arg2_var %5 = load i64, i64* %arg3_var - ; CHECK: jmp bar tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind ret void } define cc 11 void @baz() nounwind { +; CHECK-LABEL: baz: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $clos, %rax +; CHECK-NEXT: movl $42, %esi +; CHECK-NEXT: jmpq *(%rax) # TAILCALL %tmp_clos = load i64, i64* @clos %tmp_clos2 = inttoptr i64 %tmp_clos to i64* %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)* - ; CHECK: movl $42, %esi - ; CHECK-NEXT: jmpq *(%rax) tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind ret void } @@ -81,13 +106,20 @@ ; Sanity-check the tail call sequence. Number of arguments was chosen as to ; expose a bug where the tail call sequence clobbered the stack. define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 { - ; CHECK: movl $15, %esi - ; CHECK-NEXT: movl $31, %edx - ; CHECK-NEXT: movl $47, %ecx - ; CHECK-NEXT: movl $63, %r8d - ; CHECK-NEXT: popq %rax - ; CHECK-NEXT: .cfi_def_cfa_offset 16 - ; CHECK-NEXT: jmp tailcallee +; CHECK-LABEL: tailcaller: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $79, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $15, %esi +; CHECK-NEXT: movl $31, %edx +; CHECK-NEXT: movl $47, %ecx +; CHECK-NEXT: movl $63, %r8d +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: jmp tailcallee # TAILCALL %ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15, i64 31, i64 47, i64 63, i64 79) #1 ret { i64, i64, i64 } %ret Index: test/CodeGen/X86/imul.ll =================================================================== --- test/CodeGen/X86/imul.ll +++ test/CodeGen/X86/imul.ll @@ -453,13 +453,18 @@ ; ; X86-LABEL: test6: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $5, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shll $5, %esi ; X86-NEXT: movl $33, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl entry: %tmp3 = mul i64 %a, 33 Index: test/CodeGen/X86/lea-opt-cse4.ll =================================================================== --- test/CodeGen/X86/lea-opt-cse4.ll +++ test/CodeGen/X86/lea-opt-cse4.ll @@ -10,12 +10,12 @@ ; X64-NEXT: movl 16(%rdi), %eax ; X64-NEXT: movl (%rdi), %ecx ; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: leal (%rcx,%rax), %edx -; X64-NEXT: leal 1(%rax,%rcx), %ecx -; X64-NEXT: movl %ecx, 12(%rdi) -; X64-NEXT: leal 1(%rax,%rdx), %eax +; X64-NEXT: leal (%rax,%rax), %edx +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: leal (%rdx,%rax), %ecx +; X64-NEXT: leal 1(%rax,%rdx), %edx +; X64-NEXT: movl %edx, 12(%rdi) +; X64-NEXT: leal 1(%rax,%rcx), %eax ; X64-NEXT: movl %eax, 16(%rdi) ; X64-NEXT: retq ; @@ -28,12 +28,12 @@ ; X86-NEXT: movl 16(%eax), %ecx ; X86-NEXT: movl (%eax), %edx ; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: leal 1(%ecx,%edx), %esi -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: leal 1(%ecx,%edx), %ecx +; X86-NEXT: leal (%ecx,%ecx), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: leal 1(%ecx,%esi), %edx +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %edx, 12(%eax) +; X86-NEXT: leal 1(%ecx,%esi), %ecx ; X86-NEXT: movl %ecx, 16(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -72,13 +72,13 @@ ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: addl %eax, %ecx ; X64-NEXT: leal 1(%rax,%rcx), %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: movl %ecx, 16(%rdi) +; X64-NEXT: leal (%rax,%rax), %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %eax, %edx +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: movl %edx, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: foo_loop: @@ -103,13 +103,13 @@ ; X86-NEXT: # %bb.2: # %exit ; X86-NEXT: addl %ecx, %esi ; X86-NEXT: leal 1(%ecx,%esi), %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: movl %edx, 16(%eax) +; X86-NEXT: leal (%ecx,%ecx), %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl %esi, 16(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %edi Index: test/CodeGen/X86/lrshrink.ll =================================================================== --- test/CodeGen/X86/lrshrink.ll +++ test/CodeGen/X86/lrshrink.ll @@ -5,6 +5,56 @@ ; to minimize live-range. define i64 @test(i1 %a, i64 %r1, i64 %r2, i64 %s1, i64 %s2, i64 %t1, i64 %t2) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -40 +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %r15, -16 +; CHECK-NEXT: movq %rcx, %r12 +; CHECK-NEXT: movq %rsi, %r14 +; CHECK-NEXT: movl $4, %r15d +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; CHECK-NEXT: movl $10, %r15d +; CHECK-NEXT: movq %rdx, %r14 +; CHECK-NEXT: movq %r8, %r12 +; CHECK-NEXT: .LBB0_2: # %else +; CHECK-NEXT: addq %r9, %r12 +; CHECK-NEXT: callq _Z3foov +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: callq _Z3foov +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: addq %rbx, %r14 +; CHECK-NEXT: callq _Z3foov +; CHECK-NEXT: movl %eax, %eax +; CHECK-NEXT: addq %r14, %rax +; CHECK-NEXT: addq %r12, %rax +; CHECK-NEXT: addq %r15, %rax +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: br i1 %a, label %then, label %else @@ -16,15 +66,6 @@ %r = phi i64 [ %r1, %entry ], [ %r2, %then ] %s = phi i64 [ %s1, %entry ], [ %s2, %then ] %t = phi i64 [ %t1, %entry ], [ %t2, %then ] -; CHECK-LABEL: test: -; CHECK: add -; CHECK: add -; CHECK: call -; CHECK: add -; CHECK: call -; CHECK: add -; CHECK: call -; CHECK: add %1 = tail call i32 @_Z3foov() %2 = zext i32 %1 to i64 %3 = tail call i32 @_Z3foov() Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -1,21 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -machine-combiner-verify-pattern-order=true | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after machine-combiner -machine-combiner-verify-pattern-order=true -o - | FileCheck %s --check-prefix=DEAD -; Verify that integer multiplies are reassociated. The first multiply in +; Verify that integer multiplies are reassociated. The first multiply in ; each test should be independent of the result of the preceding add (lea). -; TODO: This test does not actually test i16 machine instruction reassociation +; TODO: This test does not actually test i16 machine instruction reassociation ; because the operands are being promoted to i32 types. define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) { ; CHECK-LABEL: reassociate_muls_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill -; CHECK-NEXT: # kill -; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: imull %ecx, %edx -; CHECK-NEXT: imull %edx, %eax -; CHECK-NEXT: # kill +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NEXT: imull %ecx, %edx +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %t0 = add i16 %x0, %x1 %t1 = mul i16 %x2, %t0 @@ -26,16 +27,13 @@ define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_muls_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill -; CHECK-NEXT: # kill -; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: imull %ecx, %edx -; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NEXT: imull %ecx, %edx +; CHECK-NEXT: imull %edx, %eax ; CHECK-NEXT: retq -; DEAD: ADD32rr -; DEAD-NEXT: IMUL32rr{{.*}}implicit-def dead $eflags -; DEAD-NEXT: IMUL32rr{{.*}}implicit-def dead $eflags %t0 = add i32 %x0, %x1 %t1 = mul i32 %x2, %t0 @@ -46,9 +44,9 @@ define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_muls_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: leaq (%rdi,%rsi), %rax -; CHECK-NEXT: imulq %rcx, %rdx -; CHECK-NEXT: imulq %rdx, %rax +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: imulq %rcx, %rdx +; CHECK-NEXT: imulq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %x0, %x1 %t1 = mul i64 %x2, %t0 @@ -56,17 +54,17 @@ ret i64 %t2 } -; Verify that integer 'ands' are reassociated. The first 'and' in +; Verify that integer 'ands' are reassociated. The first 'and' in ; each test should be independent of the result of the preceding sub. define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: andb %cl, %al -; CHECK-NEXT: andb %dil, %al -; CHECK-NEXT: # kill +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: andb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = and i8 %x2, %t0 @@ -79,10 +77,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: andl %ecx, %eax -; CHECK-NEXT: andl %edi, %eax +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -93,10 +91,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: andq %rcx, %rax -; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -104,17 +102,17 @@ ret i64 %t2 } -; Verify that integer 'ors' are reassociated. The first 'or' in +; Verify that integer 'ors' are reassociated. The first 'or' in ; each test should be independent of the result of the preceding sub. define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: orb %dil, %al -; CHECK-NEXT: # kill +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = or i8 %x2, %t0 @@ -127,10 +125,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -141,10 +139,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -158,11 +156,11 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: xorb %cl, %al -; CHECK-NEXT: xorb %dil, %al -; CHECK-NEXT: # kill +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: xorb %cl, %al +; CHECK-NEXT: xorb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 %t1 = xor i8 %x2, %t0 @@ -175,10 +173,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: xorl %ecx, %eax -; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -189,10 +187,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: xorq %rcx, %rax -; CHECK-NEXT: xorq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 @@ -200,3 +198,31 @@ ret i64 %t2 } +define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: reassociate_adds_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: leal (%rdx,%rdi), %eax +; CHECK-NEXT: retq + %t0 = sub i32 %x0, %x1 + %t1 = add i32 %x2, %t0 + %t2 = add i32 %x3, %t1 + ret i32 %t2 +} + +define i64 @reassociate_adds_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { +; CHECK-LABEL: reassociate_adds_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: leaq (%rdx,%rdi), %rax +; CHECK-NEXT: retq + %t0 = sub i64 %x0, %x1 + %t1 = add i64 %x2, %t0 + %t2 = add i64 %x3, %t1 + ret i64 %t2 +} + Index: test/CodeGen/X86/midpoint-int.ll =================================================================== --- test/CodeGen/X86/midpoint-int.ll +++ test/CodeGen/X86/midpoint-int.ll @@ -319,48 +319,46 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: pushl %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: cmpl %esi, %edx +; X32-NEXT: cmpl %ecx, %edx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: sbbl %ecx, %eax +; X32-NEXT: sbbl %esi, %eax ; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl $-1, %edx ; X32-NEXT: movl $-1, %edi -; X32-NEXT: movl $-1, %ebx +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: jl .LBB5_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: movl $1, %edi -; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %esi, %edx +; X32-NEXT: xorl %edi, %edi +; X32-NEXT: movl $1, %edx +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: .LBB5_2: -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NEXT: cmpl %eax, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: sbbl %edi, %eax +; X32-NEXT: cmpl %eax, %ecx ; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sbbl %ecx, %eax +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: jge .LBB5_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: .LBB5_4: -; X32-NEXT: subl %edx, %eax -; X32-NEXT: sbbl %ebp, %ecx -; X32-NEXT: shrdl $1, %ecx, %eax -; X32-NEXT: imull %eax, %ebx -; X32-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: shrl %ecx -; X32-NEXT: imull %esi, %ecx -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: subl %ebx, %eax +; X32-NEXT: sbbl %ebp, %esi +; X32-NEXT: shrdl $1, %esi, %eax +; X32-NEXT: shrl %esi +; X32-NEXT: imull %eax, %edi +; X32-NEXT: imull %edx, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: mull %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; X32-NEXT: addl $4, %esp +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -399,38 +397,38 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: cmpl %ecx, %eax -; X32-NEXT: movl %edi, %edx -; X32-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl $-1, %ebx +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: sbbl %esi, %ecx +; X32-NEXT: movl $-1, %edx ; X32-NEXT: jb .LBB6_1 ; X32-NEXT: # %bb.2: -; X32-NEXT: xorl %ebp, %ebp -; X32-NEXT: movl $1, %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %ecx, %esi +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: movl $1, %edx +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: jmp .LBB6_3 ; X32-NEXT: .LBB6_1: -; X32-NEXT: movl $-1, %ebp -; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl $-1, %ebx +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: .LBB6_3: -; X32-NEXT: subl %esi, %eax -; X32-NEXT: sbbl %edx, %edi +; X32-NEXT: subl %ecx, %eax +; X32-NEXT: sbbl %ebp, %edi ; X32-NEXT: shrdl $1, %edi, %eax -; X32-NEXT: imull %eax, %ebp -; X32-NEXT: mull %ebx -; X32-NEXT: addl %ebp, %edx ; X32-NEXT: shrl %edi -; X32-NEXT: imull %ebx, %edi +; X32-NEXT: imull %eax, %ebx +; X32-NEXT: imull %edx, %edi +; X32-NEXT: addl %ebx, %edi +; X32-NEXT: mull %edx ; X32-NEXT: addl %edi, %edx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -473,48 +471,48 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: pushl %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %esi -; X32-NEXT: movl 4(%eax), %ebp -; X32-NEXT: cmpl %esi, %ecx -; X32-NEXT: movl %edx, %eax -; X32-NEXT: sbbl %ebp, %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl 4(%eax), %ebx +; X32-NEXT: cmpl %ecx, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: sbbl %ebx, %eax ; X32-NEXT: movl $-1, %eax -; X32-NEXT: movl $-1, %ebx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: movl $-1, %edi +; X32-NEXT: movl %esi, %edx ; X32-NEXT: jl .LBB7_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: xorl %edi, %edi ; X32-NEXT: movl $1, %eax -; X32-NEXT: movl %ebp, %edx -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: .LBB7_2: ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: cmpl %ecx, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: sbbl %ecx, %eax -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: cmpl %esi, %ecx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: sbbl %esi, %eax +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: jge .LBB7_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: .LBB7_4: -; X32-NEXT: subl %edi, %eax -; X32-NEXT: sbbl %edx, %ecx -; X32-NEXT: shrdl $1, %ecx, %eax -; X32-NEXT: imull %eax, %ebx -; X32-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: shrl %ecx -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: adcl %ebp, %edx +; X32-NEXT: subl %edx, %eax +; X32-NEXT: sbbl %ebp, %esi +; X32-NEXT: shrdl $1, %esi, %eax +; X32-NEXT: shrl %esi +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: imull %edx, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: mull %edx +; X32-NEXT: addl %esi, %edx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: addl $4, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi @@ -557,45 +555,46 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: subl $8, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %edx +; X32-NEXT: movl (%eax), %ebx ; X32-NEXT: movl 4(%eax), %ebp -; X32-NEXT: cmpl %esi, %edx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: cmpl %edx, %ebx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: sbbl %ecx, %eax +; X32-NEXT: sbbl %esi, %eax ; X32-NEXT: movl $-1, %eax -; X32-NEXT: movl $-1, %ebx +; X32-NEXT: movl $-1, %edi ; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %ebx, %edx ; X32-NEXT: jl .LBB8_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: xorl %edi, %edi ; X32-NEXT: movl $1, %eax -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: .LBB8_2: ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: cmpl %edx, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: cmpl %ebx, %ecx +; X32-NEXT: movl %esi, %eax ; X32-NEXT: sbbl %ebp, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: jge .LBB8_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: .LBB8_4: -; X32-NEXT: subl %edi, %esi -; X32-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: shrdl $1, %ecx, %esi -; X32-NEXT: imull %esi, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: shrl %ecx -; X32-NEXT: imull %esi, %ecx -; X32-NEXT: addl %ecx, %edx +; X32-NEXT: subl %edx, %eax +; X32-NEXT: sbbl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: shrdl $1, %esi, %eax +; X32-NEXT: shrl %esi +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X32-NEXT: addl $8, %esp @@ -642,48 +641,48 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: subl $12, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%ecx), %esi -; X32-NEXT: movl 4(%ecx), %edi -; X32-NEXT: movl (%eax), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %ecx +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl (%eax), %ebx ; X32-NEXT: movl 4(%eax), %ebp -; X32-NEXT: cmpl %esi, %edx +; X32-NEXT: cmpl %ecx, %ebx ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: sbbl %edi, %eax ; X32-NEXT: movl $-1, %eax -; X32-NEXT: movl $-1, %ebx -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl $-1, %edx +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: jl .LBB9_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: xorl %edx, %edx ; X32-NEXT: movl $1, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: .LBB9_2: -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: cmpl %edx, %esi +; X32-NEXT: cmpl %ebx, %ecx ; X32-NEXT: movl %edi, %eax ; X32-NEXT: sbbl %ebp, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %esi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: jge .LBB9_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %ebp, %ecx -; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: .LBB9_4: ; X32-NEXT: subl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: shrdl $1, %ecx, %eax -; X32-NEXT: imull %eax, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: shrl %ecx -; X32-NEXT: imull %ebp, %ecx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: shrdl $1, %esi, %eax +; X32-NEXT: shrl %esi +; X32-NEXT: imull %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: mull %ebx +; X32-NEXT: addl %esi, %edx +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: adcl %edi, %edx ; X32-NEXT: addl $12, %esp ; X32-NEXT: popl %esi Index: test/CodeGen/X86/misched-balance.ll =================================================================== --- test/CodeGen/X86/misched-balance.ll +++ test/CodeGen/X86/misched-balance.ll @@ -1,8 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s ; ; Verify that misched resource/latency balancy heuristics are sane. define void @unrolled_mmult1(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, +; CHECK-LABEL: unrolled_mmult1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%rdx,%rax), %ebp +; CHECK-NEXT: imull (%rsi), %ebp +; CHECK-NEXT: movl (%rcx,%rax), %ebx +; CHECK-NEXT: imull 4(%rsi), %ebx +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: movl (%r8,%rax), %ebp +; CHECK-NEXT: imull 8(%rsi), %ebp +; CHECK-NEXT: movl (%r9,%rax), %r10d +; CHECK-NEXT: imull 12(%rsi), %r10d +; CHECK-NEXT: addl %ebp, %r10d +; CHECK-NEXT: addl %ebx, %r10d +; CHECK-NEXT: movl (%r13,%rax), %ebx +; CHECK-NEXT: imull 16(%rsi), %ebx +; CHECK-NEXT: movl (%r12,%rax), %ebp +; CHECK-NEXT: imull 20(%rsi), %ebp +; CHECK-NEXT: addl %ebx, %ebp +; CHECK-NEXT: movl (%r15,%rax), %r11d +; CHECK-NEXT: imull 24(%rsi), %r11d +; CHECK-NEXT: addl %ebp, %r11d +; CHECK-NEXT: addl %r10d, %r11d +; CHECK-NEXT: movl (%r14,%rax), %r10d +; CHECK-NEXT: imull 28(%rsi), %r10d +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: movl (%rbx,%rax), %ebp +; CHECK-NEXT: imull 32(%rsi), %ebp +; CHECK-NEXT: addl %r10d, %ebp +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: movl (%rbx,%rax), %ebx +; CHECK-NEXT: imull 36(%rsi), %ebx +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: addl %r11d, %ebx +; CHECK-NEXT: movl %ebx, (%rdi,%rax) +; CHECK-NEXT: addq $4, %rax +; CHECK-NEXT: cmpl $40, %eax +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) nounwind uwtable ssp { @@ -11,41 +89,8 @@ ; imull folded loads should be in order and interleaved with addl, never ; adjacent. Also check that we have no spilling. -; ; Since mmult1 IR is already in good order, this effectively ensure ; the scheduler maintains source order. -; -; CHECK-LABEL: %for.body -; CHECK-NOT: %rsp -; CHECK: imull 4 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 8 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 12 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 16 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 20 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 24 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 28 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 32 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK: imull 36 -; CHECK-NOT: {{imull|rsp}} -; CHECK: addl -; CHECK-NOT: {{imull|rsp}} -; CHECK-LABEL: %end for.body: %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] %tmp57 = load i32, i32* %tmp56, align 4 @@ -152,6 +197,83 @@ ; CHECK-NOT: {{imull|rsp}} ; CHECK-LABEL: %end define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, +; CHECK-LABEL: unrolled_mmult2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%rdx,%rax), %ebp +; CHECK-NEXT: movl (%rcx,%rax), %ebx +; CHECK-NEXT: imull (%rsi), %ebp +; CHECK-NEXT: imull 4(%rsi), %ebx +; CHECK-NEXT: movl (%r8,%rax), %r10d +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: imull 8(%rsi), %r10d +; CHECK-NEXT: movl (%r9,%rax), %ebp +; CHECK-NEXT: imull 12(%rsi), %ebp +; CHECK-NEXT: addl %r10d, %ebp +; CHECK-NEXT: movl (%r13,%rax), %r10d +; CHECK-NEXT: addl %ebx, %ebp +; CHECK-NEXT: imull 16(%rsi), %r10d +; CHECK-NEXT: movl (%r12,%rax), %ebx +; CHECK-NEXT: imull 20(%rsi), %ebx +; CHECK-NEXT: addl %r10d, %ebx +; CHECK-NEXT: movl (%r15,%rax), %r10d +; CHECK-NEXT: imull 24(%rsi), %r10d +; CHECK-NEXT: addl %ebx, %r10d +; CHECK-NEXT: movl (%r14,%rax), %ebx +; CHECK-NEXT: addl %ebp, %r10d +; CHECK-NEXT: imull 28(%rsi), %ebx +; CHECK-NEXT: movl (%r11,%rax), %ebp +; CHECK-NEXT: imull 32(%rsi), %ebp +; CHECK-NEXT: addl %ebx, %ebp +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: movl (%rbx,%rax), %ebx +; CHECK-NEXT: imull 36(%rsi), %ebx +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: addl %r10d, %ebx +; CHECK-NEXT: movl %ebx, (%rdi,%rax) +; CHECK-NEXT: addq $4, %rax +; CHECK-NEXT: cmpl $40, %eax +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) nounwind uwtable ssp { @@ -242,6 +364,37 @@ @c = external global i32, align 4 @d = external global i32, align 4 define i32 @encpc1() nounwind { +; CHECK-LABEL: encpc1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shrl $8, %edx +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: movl {{.*}}(%rip), %esi +; CHECK-NEXT: movl d(,%rsi,4), %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: movl %esi, {{.*}}(%rip) +; CHECK-NEXT: je .LBB2_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: # %if +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: shrl $24, %esi +; CHECK-NEXT: shrl $30, %ecx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: addl %esi, %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retq entry: %l1 = load i32, i32* @a, align 16 %conv = shl i32 %l1, 8 Index: test/CodeGen/X86/misched-matrix.ll =================================================================== --- test/CodeGen/X86/misched-matrix.ll +++ test/CodeGen/X86/misched-matrix.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 -pre-RA-sched=source -enable-misched \ ; RUN: -misched-topdown -verify-machineinstrs \ ; RUN: | FileCheck %s -check-prefix=TOPDOWN @@ -87,6 +88,245 @@ ; ILPMAX-LABEL: %for.end define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2, +; TOPDOWN-LABEL: mmult: +; TOPDOWN: # %bb.0: # %entry +; TOPDOWN-NEXT: pushq %rbp +; TOPDOWN-NEXT: .cfi_def_cfa_offset 16 +; TOPDOWN-NEXT: pushq %r15 +; TOPDOWN-NEXT: .cfi_def_cfa_offset 24 +; TOPDOWN-NEXT: pushq %r14 +; TOPDOWN-NEXT: .cfi_def_cfa_offset 32 +; TOPDOWN-NEXT: pushq %rbx +; TOPDOWN-NEXT: .cfi_def_cfa_offset 40 +; TOPDOWN-NEXT: .cfi_offset %rbx, -40 +; TOPDOWN-NEXT: .cfi_offset %r14, -32 +; TOPDOWN-NEXT: .cfi_offset %r15, -24 +; TOPDOWN-NEXT: .cfi_offset %rbp, -16 +; TOPDOWN-NEXT: xorl %eax, %eax +; TOPDOWN-NEXT: .p2align 4, 0x90 +; TOPDOWN-NEXT: .LBB0_1: # %for.body +; TOPDOWN-NEXT: # =>This Inner Loop Header: Depth=1 +; TOPDOWN-NEXT: movl (%rdi,%rax), %r10d +; TOPDOWN-NEXT: movl 4(%rdi,%rax), %r8d +; TOPDOWN-NEXT: movl 8(%rdi,%rax), %r9d +; TOPDOWN-NEXT: movl 12(%rdi,%rax), %r11d +; TOPDOWN-NEXT: movl (%rsi), %ecx +; TOPDOWN-NEXT: imull %r10d, %ecx +; TOPDOWN-NEXT: movl 16(%rsi), %ebx +; TOPDOWN-NEXT: imull %r8d, %ebx +; TOPDOWN-NEXT: addl %ecx, %ebx +; TOPDOWN-NEXT: movl 32(%rsi), %ebp +; TOPDOWN-NEXT: imull %r9d, %ebp +; TOPDOWN-NEXT: movl 48(%rsi), %r14d +; TOPDOWN-NEXT: imull %r11d, %r14d +; TOPDOWN-NEXT: addl %ebp, %r14d +; TOPDOWN-NEXT: addl %ebx, %r14d +; TOPDOWN-NEXT: movl 4(%rsi), %ebx +; TOPDOWN-NEXT: imull %r10d, %ebx +; TOPDOWN-NEXT: movl 20(%rsi), %ebp +; TOPDOWN-NEXT: imull %r8d, %ebp +; TOPDOWN-NEXT: addl %ebx, %ebp +; TOPDOWN-NEXT: movl 36(%rsi), %ebx +; TOPDOWN-NEXT: imull %r9d, %ebx +; TOPDOWN-NEXT: movl 52(%rsi), %ecx +; TOPDOWN-NEXT: imull %r11d, %ecx +; TOPDOWN-NEXT: addl %ebx, %ecx +; TOPDOWN-NEXT: addl %ebp, %ecx +; TOPDOWN-NEXT: movl 8(%rsi), %ebx +; TOPDOWN-NEXT: imull %r10d, %ebx +; TOPDOWN-NEXT: movl 24(%rsi), %ebp +; TOPDOWN-NEXT: imull %r8d, %ebp +; TOPDOWN-NEXT: addl %ebx, %ebp +; TOPDOWN-NEXT: movl 40(%rsi), %r15d +; TOPDOWN-NEXT: imull %r9d, %r15d +; TOPDOWN-NEXT: movl 56(%rsi), %ebx +; TOPDOWN-NEXT: imull %r11d, %ebx +; TOPDOWN-NEXT: addl %r15d, %ebx +; TOPDOWN-NEXT: addl %ebp, %ebx +; TOPDOWN-NEXT: imull 12(%rsi), %r10d +; TOPDOWN-NEXT: movl %r14d, (%rdx,%rax) +; TOPDOWN-NEXT: imull 28(%rsi), %r8d +; TOPDOWN-NEXT: addl %r10d, %r8d +; TOPDOWN-NEXT: movl %ecx, 4(%rdx,%rax) +; TOPDOWN-NEXT: imull 44(%rsi), %r9d +; TOPDOWN-NEXT: imull 60(%rsi), %r11d +; TOPDOWN-NEXT: addl %r9d, %r11d +; TOPDOWN-NEXT: addl %r8d, %r11d +; TOPDOWN-NEXT: movl %ebx, 8(%rdx,%rax) +; TOPDOWN-NEXT: movl %r11d, 12(%rdx,%rax) +; TOPDOWN-NEXT: addq $16, %rax +; TOPDOWN-NEXT: cmpl $64, %eax +; TOPDOWN-NEXT: jne .LBB0_1 +; TOPDOWN-NEXT: # %bb.2: # %for.end +; TOPDOWN-NEXT: popq %rbx +; TOPDOWN-NEXT: .cfi_def_cfa_offset 32 +; TOPDOWN-NEXT: popq %r14 +; TOPDOWN-NEXT: .cfi_def_cfa_offset 24 +; TOPDOWN-NEXT: popq %r15 +; TOPDOWN-NEXT: .cfi_def_cfa_offset 16 +; TOPDOWN-NEXT: popq %rbp +; TOPDOWN-NEXT: .cfi_def_cfa_offset 8 +; TOPDOWN-NEXT: retq +; +; ILPMIN-LABEL: mmult: +; ILPMIN: # %bb.0: # %entry +; ILPMIN-NEXT: pushq %rbp +; ILPMIN-NEXT: .cfi_def_cfa_offset 16 +; ILPMIN-NEXT: pushq %r15 +; ILPMIN-NEXT: .cfi_def_cfa_offset 24 +; ILPMIN-NEXT: pushq %r14 +; ILPMIN-NEXT: .cfi_def_cfa_offset 32 +; ILPMIN-NEXT: pushq %rbx +; ILPMIN-NEXT: .cfi_def_cfa_offset 40 +; ILPMIN-NEXT: .cfi_offset %rbx, -40 +; ILPMIN-NEXT: .cfi_offset %r14, -32 +; ILPMIN-NEXT: .cfi_offset %r15, -24 +; ILPMIN-NEXT: .cfi_offset %rbp, -16 +; ILPMIN-NEXT: xorl %r15d, %r15d +; ILPMIN-NEXT: .p2align 4, 0x90 +; ILPMIN-NEXT: .LBB0_1: # %for.body +; ILPMIN-NEXT: # =>This Inner Loop Header: Depth=1 +; ILPMIN-NEXT: movl (%rdi,%r15), %r8d +; ILPMIN-NEXT: movl 4(%rdi,%r15), %r9d +; ILPMIN-NEXT: movl 12(%rdi,%r15), %r14d +; ILPMIN-NEXT: movl 8(%rdi,%r15), %r10d +; ILPMIN-NEXT: movl 32(%rsi), %ebx +; ILPMIN-NEXT: movl 48(%rsi), %r11d +; ILPMIN-NEXT: movl (%rsi), %ebp +; ILPMIN-NEXT: movl 16(%rsi), %ecx +; ILPMIN-NEXT: imull %r8d, %ebp +; ILPMIN-NEXT: imull %r9d, %ecx +; ILPMIN-NEXT: addl %ebp, %ecx +; ILPMIN-NEXT: imull %r10d, %ebx +; ILPMIN-NEXT: imull %r14d, %r11d +; ILPMIN-NEXT: addl %ebx, %r11d +; ILPMIN-NEXT: addl %ecx, %r11d +; ILPMIN-NEXT: movl %r11d, (%rdx,%r15) +; ILPMIN-NEXT: movl 4(%rsi), %ecx +; ILPMIN-NEXT: movl 20(%rsi), %ebp +; ILPMIN-NEXT: movl 52(%rsi), %ebx +; ILPMIN-NEXT: movl 36(%rsi), %eax +; ILPMIN-NEXT: imull %r8d, %ecx +; ILPMIN-NEXT: imull %r9d, %ebp +; ILPMIN-NEXT: addl %ecx, %ebp +; ILPMIN-NEXT: imull %r10d, %eax +; ILPMIN-NEXT: imull %r14d, %ebx +; ILPMIN-NEXT: addl %eax, %ebx +; ILPMIN-NEXT: addl %ebp, %ebx +; ILPMIN-NEXT: movl %ebx, 4(%rdx,%r15) +; ILPMIN-NEXT: movl 8(%rsi), %eax +; ILPMIN-NEXT: movl 24(%rsi), %ecx +; ILPMIN-NEXT: movl 56(%rsi), %ebp +; ILPMIN-NEXT: movl 40(%rsi), %ebx +; ILPMIN-NEXT: imull %r8d, %eax +; ILPMIN-NEXT: imull %r9d, %ecx +; ILPMIN-NEXT: addl %eax, %ecx +; ILPMIN-NEXT: imull %r10d, %ebx +; ILPMIN-NEXT: imull %r14d, %ebp +; ILPMIN-NEXT: addl %ebx, %ebp +; ILPMIN-NEXT: addl %ecx, %ebp +; ILPMIN-NEXT: movl %ebp, 8(%rdx,%r15) +; ILPMIN-NEXT: imull 12(%rsi), %r8d +; ILPMIN-NEXT: imull 28(%rsi), %r9d +; ILPMIN-NEXT: addl %r8d, %r9d +; ILPMIN-NEXT: imull 44(%rsi), %r10d +; ILPMIN-NEXT: imull 60(%rsi), %r14d +; ILPMIN-NEXT: addl %r10d, %r14d +; ILPMIN-NEXT: addl %r9d, %r14d +; ILPMIN-NEXT: movl %r14d, 12(%rdx,%r15) +; ILPMIN-NEXT: addq $16, %r15 +; ILPMIN-NEXT: cmpl $64, %r15d +; ILPMIN-NEXT: jne .LBB0_1 +; ILPMIN-NEXT: # %bb.2: # %for.end +; ILPMIN-NEXT: popq %rbx +; ILPMIN-NEXT: .cfi_def_cfa_offset 32 +; ILPMIN-NEXT: popq %r14 +; ILPMIN-NEXT: .cfi_def_cfa_offset 24 +; ILPMIN-NEXT: popq %r15 +; ILPMIN-NEXT: .cfi_def_cfa_offset 16 +; ILPMIN-NEXT: popq %rbp +; ILPMIN-NEXT: .cfi_def_cfa_offset 8 +; ILPMIN-NEXT: retq +; +; ILPMAX-LABEL: mmult: +; ILPMAX: # %bb.0: # %entry +; ILPMAX-NEXT: pushq %rbp +; ILPMAX-NEXT: .cfi_def_cfa_offset 16 +; ILPMAX-NEXT: pushq %r15 +; ILPMAX-NEXT: .cfi_def_cfa_offset 24 +; ILPMAX-NEXT: pushq %r14 +; ILPMAX-NEXT: .cfi_def_cfa_offset 32 +; ILPMAX-NEXT: pushq %rbx +; ILPMAX-NEXT: .cfi_def_cfa_offset 40 +; ILPMAX-NEXT: .cfi_offset %rbx, -40 +; ILPMAX-NEXT: .cfi_offset %r14, -32 +; ILPMAX-NEXT: .cfi_offset %r15, -24 +; ILPMAX-NEXT: .cfi_offset %rbp, -16 +; ILPMAX-NEXT: xorl %r15d, %r15d +; ILPMAX-NEXT: .p2align 4, 0x90 +; ILPMAX-NEXT: .LBB0_1: # %for.body +; ILPMAX-NEXT: # =>This Inner Loop Header: Depth=1 +; ILPMAX-NEXT: movl 12(%rdi,%r15), %r14d +; ILPMAX-NEXT: movl 8(%rdi,%r15), %r8d +; ILPMAX-NEXT: movl (%rdi,%r15), %r10d +; ILPMAX-NEXT: movl 4(%rdi,%r15), %r9d +; ILPMAX-NEXT: movl 32(%rsi), %ebx +; ILPMAX-NEXT: imull %r8d, %ebx +; ILPMAX-NEXT: movl 16(%rsi), %ebp +; ILPMAX-NEXT: imull %r9d, %ebp +; ILPMAX-NEXT: movl (%rsi), %ecx +; ILPMAX-NEXT: imull %r10d, %ecx +; ILPMAX-NEXT: movl 48(%rsi), %r11d +; ILPMAX-NEXT: imull %r14d, %r11d +; ILPMAX-NEXT: addl %ecx, %ebp +; ILPMAX-NEXT: addl %ebx, %r11d +; ILPMAX-NEXT: addl %ebp, %r11d +; ILPMAX-NEXT: movl %r11d, (%rdx,%r15) +; ILPMAX-NEXT: movl 20(%rsi), %ecx +; ILPMAX-NEXT: imull %r9d, %ecx +; ILPMAX-NEXT: movl 4(%rsi), %ebp +; ILPMAX-NEXT: imull %r10d, %ebp +; ILPMAX-NEXT: movl 36(%rsi), %ebx +; ILPMAX-NEXT: imull %r8d, %ebx +; ILPMAX-NEXT: movl 52(%rsi), %eax +; ILPMAX-NEXT: imull %r14d, %eax +; ILPMAX-NEXT: addl %ebp, %ecx +; ILPMAX-NEXT: addl %ebx, %eax +; ILPMAX-NEXT: addl %ecx, %eax +; ILPMAX-NEXT: movl %eax, 4(%rdx,%r15) +; ILPMAX-NEXT: movl 24(%rsi), %eax +; ILPMAX-NEXT: imull %r9d, %eax +; ILPMAX-NEXT: movl 8(%rsi), %ecx +; ILPMAX-NEXT: imull %r10d, %ecx +; ILPMAX-NEXT: movl 40(%rsi), %ebp +; ILPMAX-NEXT: imull %r8d, %ebp +; ILPMAX-NEXT: movl 56(%rsi), %ebx +; ILPMAX-NEXT: imull %r14d, %ebx +; ILPMAX-NEXT: addl %ecx, %eax +; ILPMAX-NEXT: addl %ebp, %ebx +; ILPMAX-NEXT: addl %eax, %ebx +; ILPMAX-NEXT: movl %ebx, 8(%rdx,%r15) +; ILPMAX-NEXT: imull 12(%rsi), %r10d +; ILPMAX-NEXT: imull 28(%rsi), %r9d +; ILPMAX-NEXT: imull 44(%rsi), %r8d +; ILPMAX-NEXT: imull 60(%rsi), %r14d +; ILPMAX-NEXT: addl %r10d, %r9d +; ILPMAX-NEXT: addl %r8d, %r14d +; ILPMAX-NEXT: addl %r9d, %r14d +; ILPMAX-NEXT: movl %r14d, 12(%rdx,%r15) +; ILPMAX-NEXT: addq $16, %r15 +; ILPMAX-NEXT: cmpl $64, %r15d +; ILPMAX-NEXT: jne .LBB0_1 +; ILPMAX-NEXT: # %bb.2: # %for.end +; ILPMAX-NEXT: popq %rbx +; ILPMAX-NEXT: .cfi_def_cfa_offset 32 +; ILPMAX-NEXT: popq %r14 +; ILPMAX-NEXT: .cfi_def_cfa_offset 24 +; ILPMAX-NEXT: popq %r15 +; ILPMAX-NEXT: .cfi_def_cfa_offset 16 +; ILPMAX-NEXT: popq %rbp +; ILPMAX-NEXT: .cfi_def_cfa_offset 8 +; ILPMAX-NEXT: retq [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp { entry: br label %for.body Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -559,10 +559,10 @@ define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -572,7 +572,7 @@ ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: leal (%rdi,%rdi,8), %eax ; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %edi ; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -901,10 +901,10 @@ define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; @@ -913,7 +913,7 @@ ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax -; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: addl %edi, %edi ; X64-HSW-NEXT: addl %edi, %eax ; X64-HSW-NEXT: retq ; @@ -921,9 +921,9 @@ ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi ; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: addl %edi, %edi ; X64-JAG-NEXT: leal (%rax,%rax,2), %eax ; X64-JAG-NEXT: addl %edi, %eax -; X64-JAG-NEXT: addl %edi, %eax ; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_29: Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -498,13 +498,18 @@ define i64 @test_mul_by_17(i64 %x) { ; X86-LABEL: test_mul_by_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $4, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: shll $4, %esi ; X86-NEXT: movl $17, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_17: @@ -685,13 +690,18 @@ define i64 @test_mul_by_22(i64 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %esi ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_22: @@ -844,13 +854,18 @@ define i64 @test_mul_by_26(i64 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%eax,%eax,4), %esi ; X86-NEXT: movl $26, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_26: @@ -924,13 +939,18 @@ define i64 @test_mul_by_28(i64 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %esi ; X86-NEXT: movl $28, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_28: @@ -971,14 +991,19 @@ define i64 @test_mul_by_29(i64 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %esi +; X86-NEXT: addl %ecx, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_29: @@ -993,16 +1018,16 @@ ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax -; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: addq %rdi, %rdi ; X64-HSW-NEXT: addq %rdi, %rax ; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: addq %rdi, %rdi ; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax ; X64-JAG-NEXT: addq %rdi, %rax -; X64-JAG-NEXT: addq %rdi, %rax ; X64-JAG-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_29: @@ -1498,8 +1523,8 @@ ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %ecx ; X86-NEXT: imull %esi, %edi -; X86-NEXT: addl %edi, %edx ; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl %edi, %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1530,8 +1555,8 @@ ; X86-NOOPT-NEXT: movl %esi, %eax ; X86-NOOPT-NEXT: mull %ecx ; X86-NOOPT-NEXT: imull %esi, %edi -; X86-NOOPT-NEXT: addl %edi, %edx ; X86-NOOPT-NEXT: imull %ebx, %ecx +; X86-NOOPT-NEXT: addl %edi, %ecx ; X86-NOOPT-NEXT: addl %ecx, %edx ; X86-NOOPT-NEXT: popl %esi ; X86-NOOPT-NEXT: popl %edi Index: test/CodeGen/X86/mul-constant-i8.ll =================================================================== --- test/CodeGen/X86/mul-constant-i8.ll +++ test/CodeGen/X86/mul-constant-i8.ll @@ -340,7 +340,7 @@ ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: leal (%rdi,%rdi,8), %eax ; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %edi ; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/mul-i1024.ll =================================================================== --- test/CodeGen/X86/mul-i1024.ll +++ test/CodeGen/X86/mul-i1024.ll @@ -51,7 +51,7 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl 36(%esi), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi @@ -121,7 +121,7 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 4(%esi), %eax @@ -140,7 +140,7 @@ ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, %ebx @@ -459,9 +459,9 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, %ebx @@ -954,9 +954,9 @@ ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %ebp, %edi -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -972,7 +972,7 @@ ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl $0, %edx @@ -981,7 +981,7 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: setb %dl ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %ebx, %edx @@ -994,7 +994,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1020,32 +1020,34 @@ ; X32-NEXT: addl %eax, %edx ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %ecx, %eax +; X32-NEXT: movl %edi, %ecx ; X32-NEXT: addl %edi, %edx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -1070,7 +1072,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1186,7 +1188,7 @@ ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -1209,7 +1211,7 @@ ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %dl ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movzbl %dl, %eax ; X32-NEXT: adcl %ebp, %eax @@ -1223,7 +1225,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl %ebp, %esi @@ -1237,7 +1239,7 @@ ; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1249,12 +1251,12 @@ ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 60(%eax), %esi ; X32-NEXT: movl %esi, %eax @@ -1262,9 +1264,9 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -1275,8 +1277,8 @@ ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %edi -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -1284,7 +1286,7 @@ ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1299,90 +1301,91 @@ ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %ebp ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl (%esp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1390,16 +1393,16 @@ ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -1410,59 +1413,59 @@ ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %esi, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload @@ -1470,31 +1473,31 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1508,9 +1511,9 @@ ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -1546,7 +1549,7 @@ ; X32-NEXT: addl %ebx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebp ; X32-NEXT: adcl $0, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1562,7 +1565,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1583,7 +1586,7 @@ ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl %ebp, %eax @@ -1629,7 +1632,7 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -1653,7 +1656,7 @@ ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1668,33 +1671,33 @@ ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi @@ -1725,43 +1728,42 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -1770,32 +1772,33 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -1805,30 +1808,30 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -1843,33 +1846,33 @@ ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -1923,34 +1926,34 @@ ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 28(%eax), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 28(%eax), %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %edi -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -1958,7 +1961,7 @@ ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1973,108 +1976,108 @@ ; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl %esi, %ebp ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -2092,18 +2095,18 @@ ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %bl +; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull %ebp ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %esi -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2126,17 +2129,18 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -2146,44 +2150,44 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebp -; X32-NEXT: adcl %edx, %edi +; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax @@ -2193,49 +2197,52 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: setb %cl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %esi -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %edx +; X32-NEXT: addl %ebx, %edx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl %edx, %esi +; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -2245,23 +2252,22 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -2273,30 +2279,29 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, %esi -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -2308,19 +2313,19 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp @@ -2347,25 +2352,25 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi @@ -2401,7 +2406,7 @@ ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -2416,26 +2421,24 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %ebx, %edi -; X32-NEXT: mull %ebx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -2444,32 +2447,34 @@ ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -2477,29 +2482,29 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -2515,18 +2520,18 @@ ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax @@ -2534,7 +2539,7 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -2550,7 +2555,7 @@ ; X32-NEXT: adcl %edx, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi @@ -2558,7 +2563,7 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload @@ -2573,7 +2578,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl $0, %ebp @@ -2584,7 +2589,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -2598,12 +2603,12 @@ ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -2631,7 +2636,7 @@ ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax @@ -2647,30 +2652,30 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %ebp -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload @@ -2678,11 +2683,12 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -2693,7 +2699,7 @@ ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi @@ -2713,13 +2719,13 @@ ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2755,7 +2761,7 @@ ; X32-NEXT: addl %ebp, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -2766,32 +2772,31 @@ ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %edi -; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2799,33 +2804,33 @@ ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp -; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -2833,125 +2838,125 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl %eax, %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -2961,8 +2966,8 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx @@ -2971,97 +2976,96 @@ ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %edi -; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 60(%eax), %ebp -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl 60(%eax), %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: addl %esi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebp -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -3069,17 +3073,17 @@ ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ebp, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -3088,43 +3092,43 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %esi +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: setb %cl ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3134,125 +3138,125 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %ebp -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: mull %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: adcl %ebp, %edx +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -3262,51 +3266,48 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl $0, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %eax ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -3315,13 +3316,13 @@ ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -3356,42 +3357,43 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %ebp ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax @@ -3399,99 +3401,99 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebp @@ -3532,17 +3534,16 @@ ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %ebp +; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -3553,24 +3554,24 @@ ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3578,9 +3579,9 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3637,33 +3638,33 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -3674,121 +3675,122 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %esi +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx +; X32-NEXT: addl %ebp, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -3800,41 +3802,41 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -3844,63 +3846,63 @@ ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -3909,7 +3911,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, (%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi @@ -3967,7 +3969,7 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -4002,7 +4004,7 @@ ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -4011,7 +4013,7 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx @@ -4055,19 +4057,20 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx @@ -4075,8 +4078,7 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx @@ -4089,32 +4091,32 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 76(%eax), %ecx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 76(%eax), %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload @@ -4141,19 +4143,19 @@ ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -4207,56 +4209,57 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, %ebx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %edi, %ebp -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -4280,7 +4283,7 @@ ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload @@ -4288,7 +4291,7 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx @@ -4314,7 +4317,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi @@ -4326,7 +4329,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -4372,26 +4375,26 @@ ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx @@ -4488,7 +4491,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -4529,11 +4532,11 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -4548,7 +4551,7 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi @@ -4662,7 +4665,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -4716,64 +4719,64 @@ ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -4794,8 +4797,8 @@ ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %ebp, %edi -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload @@ -4840,31 +4843,28 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %edi, %edx ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %esi, %edx +; X32-NEXT: imull %ebx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx +; X32-NEXT: imull %esi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: mull %esi ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %esi +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx @@ -4878,14 +4878,14 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -4894,51 +4894,48 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %eax, %ecx +; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -4952,37 +4949,38 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 104(%esi), %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 104(%ecx), %ebp ; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 108(%esi), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl 108(%ecx), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4991,122 +4989,125 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 96(%edi), %ebx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 96(%ecx), %edi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 100(%edi), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 100(%ecx), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl %ebp, %eax ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %bl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebp +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -5116,128 +5117,122 @@ ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 112(%esi), %edi -; X32-NEXT: imull %edi, %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebp, %edx -; X32-NEXT: movl 116(%esi), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: imull %eax, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: movl 120(%esi), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl 112(%ebx), %esi +; X32-NEXT: movl %edi, %ecx ; X32-NEXT: imull %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 124(%ecx), %ecx -; X32-NEXT: imull %ebp, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl 116(%ebx), %ebp +; X32-NEXT: imull %ebp, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 120(%ebx), %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edx +; X32-NEXT: movl 124(%ebx), %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %edi, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: mull %edi +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebp -; X32-NEXT: setb %bl +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %bl, %esi -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: mull %ebp +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl %edx, %ebx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %ebx, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: movl %edi, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %eax, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: imull %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: imull %edi, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: mull %edi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, %edi -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, %edi +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -5256,7 +5251,7 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -5815,6 +5810,7 @@ ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %esi ; X32-NEXT: mull %ebp ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx @@ -5827,19 +5823,19 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp @@ -6109,36 +6105,36 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebx, %edx ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: movl %esi, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %ecx, %esi -; X32-NEXT: addl %edx, %esi +; X32-NEXT: addl %edx, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ebx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx @@ -6162,50 +6158,49 @@ ; X32-NEXT: movl 120(%edi), %esi ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl 112(%edi), %ebp -; X32-NEXT: movl 116(%edi), %ebx +; X32-NEXT: movl 112(%edi), %ecx +; X32-NEXT: movl 116(%edi), %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi -; X32-NEXT: imull %ebx, %edi -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ebp, %ecx -; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %edi, %edx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ecx, %ebx +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ebx, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload @@ -6275,46 +6270,45 @@ ; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, %ebp ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx @@ -6335,30 +6329,31 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebp, %edi ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -6368,167 +6363,163 @@ ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: imull %eax, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl %ecx, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %edi +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: imull %esi, %edi -; X32-NEXT: addl %edx, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: mull %ebx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, %esi -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebp -; X32-NEXT: setb %cl +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: mull %ebp +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %edi, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edx ; X32-NEXT: addl %edi, %edx ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload @@ -6536,49 +6527,17 @@ ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6591,15 +6550,48 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, (%ecx) @@ -6633,32 +6625,33 @@ ; X32-NEXT: movl %esi, 56(%ecx) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 60(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 64(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 68(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 72(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 76(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 80(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 84(%ecx) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 64(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 68(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 72(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 76(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 80(%ecx) -; X32-NEXT: movl %ebp, 84(%ecx) -; X32-NEXT: movl %edi, 88(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 92(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 96(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 100(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 104(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 108(%ecx) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, 112(%ecx) -; X32-NEXT: movl %ebx, 116(%ecx) +; X32-NEXT: movl %esi, 88(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 92(%ecx) +; X32-NEXT: movl %ebp, 96(%ecx) +; X32-NEXT: movl %ebx, 100(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 104(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 108(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, 112(%ecx) +; X32-NEXT: movl %edi, 116(%ecx) ; X32-NEXT: movl %edx, 120(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, 124(%ecx) ; X32-NEXT: addl $1000, %esp # imm = 0x3E8 ; X32-NEXT: popl %esi @@ -6677,45 +6670,44 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: subq $352, %rsp # imm = 0x160 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 48(%rdi), %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq 48(%rdi), %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 40(%rdi), %rbp ; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 32(%rdi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r10 ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %r12, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: adcq %rdi, %rbp +; X64-NEXT: adcq %r9, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbx, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r12, %r15 +; X64-NEXT: addq %rax, %r15 +; X64-NEXT: movq %r9, %r13 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r13 +; X64-NEXT: addq %rbp, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rbx, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rsi, %r8 ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp @@ -6725,12 +6717,12 @@ ; X64-NEXT: movq 8(%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbp -; X64-NEXT: xorl %r9d, %r9d -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rcx, %r15 +; X64-NEXT: xorl %r11d, %r11d +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rcx, %r14 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rdi, %r15 +; X64-NEXT: addq %rdi, %r14 ; X64-NEXT: adcq %rcx, %rbp ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6738,607 +6730,604 @@ ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax -; X64-NEXT: movq %rsi, %r13 ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: addq %rax, %r9 +; X64-NEXT: movq %rdi, %r11 +; X64-NEXT: addq %rax, %r11 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: addq %rbp, %r11 ; X64-NEXT: adcq %rbx, %rax ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: adcq %rcx, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%r10), %rax +; X64-NEXT: adcq %rcx, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq (%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: mulq %r8 +; X64-NEXT: xorl %r10d, %r10d +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq 32(%r13), %rax +; X64-NEXT: movq 32(%r8), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r8 -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %r8 -; X64-NEXT: addq %r13, %rax +; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: addq %r8, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r11, %rax -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %r11 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %r15, %rax +; X64-NEXT: adcq %r14, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rax -; X64-NEXT: adcq %r9, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rbp, %rdi -; X64-NEXT: movq 8(%r10), %rax +; X64-NEXT: movq %r14, %r10 +; X64-NEXT: adcq %r11, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rbp, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %r14 +; X64-NEXT: movq 8(%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rsi, %r12 +; X64-NEXT: xorl %r12d, %r12d +; X64-NEXT: mulq %r12 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: addq %rbx, %r15 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq 16(%r10), %rax +; X64-NEXT: movq 16(%r9), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r8, %rcx -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: movq %rsi, %r10 -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: adcq %rbx, %r10 -; X64-NEXT: movq %r8, %rdx -; X64-NEXT: movq %r8, %r14 -; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill -; X64-NEXT: addq %r11, %rdx +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rsi +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: movq %rcx, %r13 +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r12, %rsi -; X64-NEXT: adcq %r12, %r15 -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rcx, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: adcq %r10, %rdi +; X64-NEXT: movq %rdi, %r12 ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq 40(%rdi), %rax +; X64-NEXT: adcq %r15, %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r9, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, %r11 +; X64-NEXT: adcq %rsi, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rsi, %r14 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq 40(%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: xorl %r9d, %r9d -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rcx +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: addq %r10, %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r13, %rcx -; X64-NEXT: adcq %r11, %rbp +; X64-NEXT: addq %r8, %rdi +; X64-NEXT: adcq %r10, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: movq 48(%rdi), %rax +; X64-NEXT: movq 48(%rsi), %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r13, %r12 -; X64-NEXT: addq %rax, %r12 -; X64-NEXT: movq %r11, %rdi -; X64-NEXT: adcq %rdx, %rdi -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: adcq %rbx, %rdi +; X64-NEXT: movq %r8, %rsi +; X64-NEXT: addq %rax, %rsi +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: addq %rbp, %rsi +; X64-NEXT: adcq %rbx, %rcx +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: addq %r8, %r13 ; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r13, %r14 +; X64-NEXT: adcq %rdi, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rcx, %r14 ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rcx, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r12, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r10 -; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: movq %r8, %rax -; X64-NEXT: addq %r13, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: addq %r8, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r10, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, %r10 -; X64-NEXT: addq %r13, %rax +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: addq %r8, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %r11 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 +; X64-NEXT: movq 56(%rax), %r9 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rsi, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %rbp, %rsi -; X64-NEXT: setb %cl -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r13 -; X64-NEXT: addq %rsi, %rax -; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: adcq %rbp, %rcx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r11 +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: addq %r12, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %r12 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r14 +; X64-NEXT: adcq %rdx, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rsi, %rbx +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rcx +; X64-NEXT: adcq %rbp, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %r13 -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: movzbl %bl, %eax -; X64-NEXT: adcq %rax, %r13 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: addq %r14, %rsi -; X64-NEXT: adcq %r8, %r13 +; X64-NEXT: adcq %rax, %r12 +; X64-NEXT: addq (%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: addq %r10, %rcx +; X64-NEXT: adcq %r8, %r12 +; X64-NEXT: adcq $0, %r14 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r11, %rbx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r11 -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp -; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rsi, %rdi +; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %r9 -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbp, %r8 -; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: movq 24(%rax), %r13 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx -; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq %r10, %rbp -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: addq %r11, %rbp +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %bl, %esi +; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: adcq %r10, %rbx -; X64-NEXT: addq %rax, %rbp -; X64-NEXT: adcq %rdx, %rbx -; X64-NEXT: addq %rsi, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r13, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %rbx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: adcq %r8, %rbp +; X64-NEXT: addq %rax, %rbx +; X64-NEXT: adcq %rdx, %rbp +; X64-NEXT: addq %rcx, %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbx -; X64-NEXT: addq %r15, %rbp -; X64-NEXT: adcq %r12, %rbx -; X64-NEXT: setb %r15b +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: addq %r14, %rbx +; X64-NEXT: adcq %r15, %rbp +; X64-NEXT: setb %r14b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq %r11, %rsi +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r14, %rdi +; X64-NEXT: addq %r10, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rdi, %r14 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %rdi, %r10 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %r9, %r12 +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r13 ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: addq %r11, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: adcq %r10, %r9 +; X64-NEXT: addq %r9, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r9 -; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rbx, %r14 -; X64-NEXT: movzbl %r15b, %eax +; X64-NEXT: adcq %rdx, %r13 +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: adcq %rbp, %r10 +; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: adcq $0, %r9 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r13 +; X64-NEXT: addq (%rsp), %r15 # 8-byte Folded Reload +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 24(%rax), %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rbx, %rbp -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbp, %r15 -; X64-NEXT: adcq %rsi, %rbx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movq 24(%rax), %rbp +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, (%rsp) # 8-byte Spill +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: setb %bl +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r9 +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdx, %r11 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rdi -; X64-NEXT: setb %cl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %rbp -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r15, %rsi +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdi, %rsi +; X64-NEXT: setb %bl +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rbp, %r13 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %rsi, %rcx +; X64-NEXT: movzbl %bl, %eax +; X64-NEXT: adcq %rax, %r14 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: addq %r10, %rcx +; X64-NEXT: adcq %r12, %r14 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: movq %r15, %rbx +; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r10 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdi, %r15 +; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp +; X64-NEXT: addq %rsi, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: adcq %rdi, %rsi ; X64-NEXT: setb %dil -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: addq %r13, %rdi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rbx +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %dil, %esi +; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: adcq %r14, %rbp -; X64-NEXT: addq %rax, %rdi -; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: addq %rbx, %r11 -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: adcq %r13, %rbx +; X64-NEXT: addq %rax, %rbp +; X64-NEXT: adcq %rdx, %rbx +; X64-NEXT: addq %rcx, %r10 +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %r12 +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %r8, %rdi -; X64-NEXT: adcq %r10, %rbp +; X64-NEXT: adcq $0, %rbx +; X64-NEXT: addq %r8, %rbp +; X64-NEXT: adcq %r11, %rbx ; X64-NEXT: setb %r10b ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %rax +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq (%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r8, %rbx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r12 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r12 -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rdi, %r15 +; X64-NEXT: adcq %rsi, %r11 +; X64-NEXT: setb %sil +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: addq %r11, %rax +; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: movq %r11, %r14 +; X64-NEXT: addq %r9, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq %r9, %r15 -; X64-NEXT: addq %r13, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %r13 -; X64-NEXT: adcq %r14, %r13 -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %r13 -; X64-NEXT: addq %rdi, %r11 -; X64-NEXT: adcq %rbp, %r8 +; X64-NEXT: movq %r9, %rcx +; X64-NEXT: adcq %r13, %rcx +; X64-NEXT: addq %rax, %r14 +; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %r15 ; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq %rax, %r14 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rbp -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rsi, %rcx -; X64-NEXT: adcq $0, %rbx -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %r8, %r10 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: adcq %rbx, %rsi -; X64-NEXT: setb %cl +; X64-NEXT: addq %rbx, %r8 +; X64-NEXT: adcq %rbp, %rcx +; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: addq %rsi, %rax -; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; X64-NEXT: movq %r12, %r10 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; X64-NEXT: addq %rax, %r9 -; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %r11, %r10 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: movq %r9, %r12 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r10 +; X64-NEXT: adcq %rdx, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %rdi, %rsi +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r12, %rbx -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: addq %r9, %rbx +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil +; X64-NEXT: adcq %rbp, %rsi +; X64-NEXT: setb %cl ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %rbp -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: mulq %r15 +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: addq %rsi, %rbx +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: adcq %rax, %r9 ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: addq %r14, %rbx -; X64-NEXT: adcq %r8, %rdi -; X64-NEXT: adcq $0, %r9 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: addq %r13, %rbx +; X64-NEXT: adcq %r8, %r9 ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r12, %r11 -; X64-NEXT: movq %r12, %rax +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: movq %r11, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r8 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r8 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r14, %rcx -; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rsi, %rbp +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq 56(%rax), %rsi +; X64-NEXT: movq 56(%rax), %rcx ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r11 -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rcx, %r14 -; X64-NEXT: adcq %rbp, %rsi -; X64-NEXT: setb %cl +; X64-NEXT: addq %rbp, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: adcq %rdi, %rsi +; X64-NEXT: setb %dil ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rcx ; X64-NEXT: addq %rsi, %rax -; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: addq %r8, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: addq %r8, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %rsi -; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rdi, %r14 +; X64-NEXT: adcq %r11, %rcx +; X64-NEXT: addq %rax, %rdi +; X64-NEXT: adcq %rdx, %rcx +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: movq %r15, %r13 +; X64-NEXT: adcq %r9, %rbp +; X64-NEXT: movq %rbp, %r15 +; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: addq %r9, %rcx -; X64-NEXT: adcq %r10, %rsi +; X64-NEXT: addq %r10, %rdi +; X64-NEXT: adcq %r12, %rcx ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: movq (%rsp), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq %r12, %r9 ; X64-NEXT: setb %bl ; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: mulq %rsi ; X64-NEXT: addq %r9, %rax -; X64-NEXT: movzbl %bl, %edi -; X64-NEXT: adcq %rdi, %rdx +; X64-NEXT: movzbl %bl, %esi +; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: addq %r8, %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload @@ -7346,8 +7335,8 @@ ; X64-NEXT: addq %rax, %r12 ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: adcq %rsi, %rbp +; X64-NEXT: addq %rdi, %rdx +; X64-NEXT: adcq %rcx, %rbp ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: adcq $0, %r10 @@ -7359,9 +7348,10 @@ ; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r13, %r14 -; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %r12 @@ -7372,112 +7362,115 @@ ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r11, %rbx -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %bl -; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: addq %rcx, %rax +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: adcq %rsi, %rdi +; X64-NEXT: setb %bl +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rcx, %r11 +; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 -; X64-NEXT: adcq %rdx, %r15 +; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r11, %rbx +; X64-NEXT: addq %r9, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r14 -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %rcx, %r13 +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload -; X64-NEXT: adcq %r9, %r11 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq %r15, %r11 ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r14, %rax +; X64-NEXT: adcq $0, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, %rbx +; X64-NEXT: movq %r9, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rbx, %rcx -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r14, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: addq %r15, %rcx +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: adcq %rsi, %rbx +; X64-NEXT: adcq %rdi, %r15 ; X64-NEXT: setb %cl -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %rbx +; X64-NEXT: addq %r15, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: addq %rax, %r14 +; X64-NEXT: addq %rax, %r13 ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r13, %r9 -; X64-NEXT: movq %r9, %r13 +; X64-NEXT: addq %rsi, %r9 +; X64-NEXT: movq %r9, %r14 ; X64-NEXT: adcq %r11, %r8 -; X64-NEXT: movq %r8, %r15 -; X64-NEXT: adcq $0, %r14 +; X64-NEXT: movq %r8, %r9 +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill @@ -7486,7 +7479,7 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rax, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi @@ -7498,8 +7491,8 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rdi, %r9 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rdi, %r11 ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r8, %rax @@ -7513,631 +7506,626 @@ ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r14, %r11 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: addq %r13, %r15 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r12, %r13 -; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r10, %r15 -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %r14 +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r10, %r9 +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload -; X64-NEXT: adcq %rax, %r11 +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r11 ; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq 64(%r9), %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq 64(%rsi), %r9 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rsi, %rbx +; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 72(%r9), %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %rcx -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rbx, %r8 -; X64-NEXT: adcq %rbp, %rsi -; X64-NEXT: setb %bl +; X64-NEXT: movq 72(%rsi), %rcx ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r13 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: adcq %rbp, %rcx +; X64-NEXT: setb %bl +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rsi, %rdi +; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: movzbl %bl, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: adcq %rax, %rbp +; X64-NEXT: movq %r9, %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: addq %rax, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: adcq %rdx, %r15 -; X64-NEXT: addq %rdi, %r10 -; X64-NEXT: adcq %rcx, %r15 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; X64-NEXT: movq %r12, %rax -; X64-NEXT: movq %r11, %rsi -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: adcq %rdx, %r11 +; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: adcq %rbp, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %r11, %rdi -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: addq %r10, %rsi +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rsi, %rcx +; X64-NEXT: adcq %rbp, %rdi ; X64-NEXT: setb %sil -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r11 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rbx, %r10 ; X64-NEXT: mulq %r13 -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx -; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload -; X64-NEXT: adcq %r13, %r14 -; X64-NEXT: addq %rax, %rbx +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: adcq %rsi, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq %r8, %r14 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq 80(%r9), %rdi -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %r8, %rcx -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq 88(%r9), %r9 -; X64-NEXT: movq %r12, %rax -; X64-NEXT: mulq %r9 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, %r14 +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq 80(%rbx), %rsi +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: setb %r12b -; X64-NEXT: movq %r11, %rax +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %r9, %rdi +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq 88(%rbx), %r9 +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %rbp, %rsi -; X64-NEXT: movzbl %r12b, %eax -; X64-NEXT: adcq %rax, %rcx -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %rdi, %rcx +; X64-NEXT: adcq %rbp, %rbx +; X64-NEXT: setb %r15b +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: movzbl %r15b, %eax +; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: addq %rax, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: addq %r11, %rax -; X64-NEXT: adcq %rdx, %r13 -; X64-NEXT: addq %rsi, %rax -; X64-NEXT: adcq %rcx, %r13 -; X64-NEXT: addq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq %r14, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r15, %rax +; X64-NEXT: addq %rbp, %rdx +; X64-NEXT: adcq %rdi, %rax +; X64-NEXT: addq %r8, %r13 +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r14, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %rdx ; X64-NEXT: adcq $0, %rax -; X64-NEXT: adcq $0, %r13 -; X64-NEXT: addq %r10, %rax -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: adcq %r15, %r13 -; X64-NEXT: setb %r14b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: addq %r12, %rdx +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: adcq %r11, %rax +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: setb %r11b +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rcx, %rbp +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %bl -; X64-NEXT: movq %r10, %rax +; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: setb %dil +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r9 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %bl, %ecx +; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %r11, %rsi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: addq %r10, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r12, %rcx -; X64-NEXT: addq %rax, %rsi +; X64-NEXT: adcq %r15, %rcx +; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r8, %r15 -; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: addq %r14, %r8 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r12, %rbp ; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %r14b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rax, %r9 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: addq %r9, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: imulq %rbp, %rdi -; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: imulq %r10, %rsi +; X64-NEXT: addq %r9, %rsi +; X64-NEXT: addq %rdx, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rcx, %rax -; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r8, %r9 -; X64-NEXT: adcq %rdi, %rax -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: imulq %r14, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: imulq %r9, %rbp +; X64-NEXT: addq %rdx, %rbp +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rdx, %rbp +; X64-NEXT: addq %r8, %rbx +; X64-NEXT: adcq %rsi, %rbp +; X64-NEXT: movq %rbp, %r8 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r14, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rbp +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: addq %rbx, %r15 -; X64-NEXT: adcq %rsi, %rdi -; X64-NEXT: setb %cl -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rax, %r13 -; X64-NEXT: addq %rdi, %r13 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %r12 -; X64-NEXT: addq %r9, %r13 -; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq 120(%rbp), %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 112(%rbp), %rsi -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: imulq %r8, %rsi -; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: movq 96(%rbp), %rdi -; X64-NEXT: movq 104(%rbp), %rbx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: imulq %rbx, %rcx -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rdi, %rax -; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r11, %r9 -; X64-NEXT: adcq %rsi, %rax -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r10 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: addq %rbp, %r11 +; X64-NEXT: adcq %rsi, %rdi +; X64-NEXT: setb %sil +; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %r10 -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: addq %rdi, %r12 +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: adcq %rax, %r15 +; X64-NEXT: addq %rbx, %r12 +; X64-NEXT: adcq %r8, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq 120(%rsi), %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: movq 112(%rsi), %rbp +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: imulq %r9, %rbp ; X64-NEXT: addq %rcx, %rbp -; X64-NEXT: adcq $0, %rsi +; X64-NEXT: addq %rdx, %rbp +; X64-NEXT: movq 96(%rsi), %rdi +; X64-NEXT: movq 104(%rsi), %rsi +; X64-NEXT: movq %r13, %rax +; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: imulq %rdi, %rcx +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: addq %r10, %rbx +; X64-NEXT: adcq %rbp, %rcx +; X64-NEXT: movq %rcx, %r13 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rbp, %rdi -; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: addq %r10, %rcx +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: adcq %rbp, %rdi +; X64-NEXT: setb %cl +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq %r9, %rax -; X64-NEXT: adcq %r11, %rdx +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: adcq %r13, %rdx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq %r15, %rdi -; X64-NEXT: adcq %r13, %rax -; X64-NEXT: adcq %r12, %rdx +; X64-NEXT: adcq %r11, %r8 +; X64-NEXT: adcq %r12, %rax +; X64-NEXT: adcq %r15, %rdx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: movq 80(%r15), %rcx +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: movq 80(%r9), %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq 88(%r15), %r13 +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %r9, %r8 ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq 88(%r9), %r8 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rbx, %r14 -; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r10b -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movzbl %r10b, %eax -; X64-NEXT: adcq %rax, %rbp -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rsi, %rbp +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: addq %rbp, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdi, %rbx +; X64-NEXT: setb %r9b +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rbx, %rbp +; X64-NEXT: movzbl %r9b, %eax +; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rax, %r11 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq (%rsp), %r12 # 8-byte Reload -; X64-NEXT: addq %r12, %rsi -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: adcq %r8, %r10 -; X64-NEXT: addq %rbx, %rsi -; X64-NEXT: adcq %rbp, %r10 -; X64-NEXT: movq 64(%r9), %r13 -; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %r11 +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: adcq %r14, %r12 +; X64-NEXT: addq %rbp, %r11 +; X64-NEXT: adcq %rdi, %r12 +; X64-NEXT: movq 64(%r15), %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%r9), %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq 72(%r15), %r15 +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rcx -; X64-NEXT: setb %r11b -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r15 +; X64-NEXT: adcq %rbp, %rdi +; X64-NEXT: setb %r10b +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp -; X64-NEXT: movzbl %r11b, %eax +; X64-NEXT: addq %rdi, %rbp +; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rbx -; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %r12, %rcx -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r8 -; X64-NEXT: addq %rbp, %rcx -; X64-NEXT: adcq %rbx, %r8 -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill -; X64-NEXT: adcq %r14, %r8 -; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp -; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %rbp, %rax -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: adcq %rdi, %rcx -; X64-NEXT: setb %dil -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rbx -; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; X64-NEXT: addq %r9, %r15 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: adcq %r8, %r11 -; X64-NEXT: addq %rax, %r15 -; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq (%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, (%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: addq %rsi, %r15 -; X64-NEXT: adcq %r10, %r11 -; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %r14, %rsi -; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rax, %r9 ; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: adcq %rdx, %rax +; X64-NEXT: addq %rbp, %r9 +; X64-NEXT: adcq %rbx, %rax +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r11 +; X64-NEXT: adcq $0, %r12 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: addq %r8, %rbx +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq (%rsp), %r8 # 8-byte Reload +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: adcq %rbp, %rsi +; X64-NEXT: setb %bpl +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %bpl, %esi +; X64-NEXT: adcq %rsi, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: addq %r9, %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: adcq %r14, %r10 +; X64-NEXT: addq %rax, %rdi +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: adcq $0, %r10 +; X64-NEXT: addq %r11, %rdi +; X64-NEXT: adcq %r12, %r10 +; X64-NEXT: setb %r15b +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %r12, %rbp +; X64-NEXT: adcq $0, %rbx +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: setb %bl -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: addq %r10, %rax -; X64-NEXT: movzbl %bl, %ecx -; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movzbl %bl, %esi +; X64-NEXT: adcq %rsi, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: addq %r9, %rbx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq %r8, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: adcq %r14, %rsi ; X64-NEXT: addq %rax, %rbx -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r15, %rbp -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r11, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r10, %r12 +; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rbx ; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq 96(%rbp), %rcx -; X64-NEXT: imulq %rcx, %rdi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq 104(%rbp), %r8 -; X64-NEXT: imulq %r8, %rsi -; X64-NEXT: addq %rdx, %rsi -; X64-NEXT: movq %rsi, %r11 -; X64-NEXT: movq 112(%rbp), %rax -; X64-NEXT: movq %rbp, %rdi -; X64-NEXT: movq %rax, %rsi +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq 96(%rdi), %rsi +; X64-NEXT: imulq %rsi, %r8 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rcx, %r9 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq 104(%rdi), %r10 +; X64-NEXT: imulq %r10, %r9 +; X64-NEXT: addq %r8, %r9 +; X64-NEXT: addq %rdx, %r9 +; X64-NEXT: movq 112(%rdi), %rax +; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: imulq %rbp, %rsi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rsi, %rdx +; X64-NEXT: imulq %rbp, %rcx ; X64-NEXT: movq 120(%rdi), %rdi +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: imulq %rbx, %rdi +; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: mulq %rbx +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: addq %r9, %r10 -; X64-NEXT: adcq %r11, %rdi +; X64-NEXT: addq %r11, %r8 +; X64-NEXT: adcq %r9, %rdi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq %rbx, %rsi -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rbx, %r9 +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r9 -; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rbx, %rbp -; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: adcq %rcx, %rsi -; X64-NEXT: setb %cl +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rsi, %rbp -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %rbx -; X64-NEXT: addq %r10, %rbp -; X64-NEXT: adcq %rdi, %rbx +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: addq %rbx, %r15 +; X64-NEXT: adcq %rsi, %rcx +; X64-NEXT: setb %bl +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: addq %rcx, %r10 +; X64-NEXT: movzbl %bl, %eax +; X64-NEXT: adcq %rax, %rbp +; X64-NEXT: addq %r8, %r10 +; X64-NEXT: adcq %rdi, %rbp +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: imulq %rax, %rcx +; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: imulq %r13, %rsi -; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; X64-NEXT: imulq %r11, %r8 -; X64-NEXT: addq %rdx, %r8 +; X64-NEXT: imulq %r8, %rsi +; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: addq %rdx, %rsi +; X64-NEXT: movq %rsi, %rdi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq %r14, %rax -; X64-NEXT: imulq %rdi, %rax -; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %r9, %r10 -; X64-NEXT: adcq %r8, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, %r14 -; X64-NEXT: mulq %r13 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r13 +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: imulq %rcx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: imulq %rsi, %r13 +; X64-NEXT: addq %rdx, %r13 +; X64-NEXT: mulq %rsi +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rdx, %r13 +; X64-NEXT: addq %rbx, %r14 +; X64-NEXT: adcq %rdi, %r13 +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rcx, %rdi +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: addq %rdi, %rcx +; X64-NEXT: addq %rbx, %rcx ; X64-NEXT: adcq $0, %r9 -; X64-NEXT: movq %r14, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %rcx, %rsi -; X64-NEXT: adcq %r9, %rdi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rcx, %rbx +; X64-NEXT: adcq %r9, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %r11 -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq %r10, %rax -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq %r12, %rsi -; X64-NEXT: adcq %rbp, %rax -; X64-NEXT: adcq %rbx, %rdx -; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq %r14, %rax +; X64-NEXT: adcq %r13, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq %r15, %rbx +; X64-NEXT: adcq %r10, %rax +; X64-NEXT: adcq %rbp, %rdx +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq %r12, %rbx ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq (%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NEXT: movq %rdi, %r10 +; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rsi, %r9 ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, (%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 8(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 16(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 24(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 32(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 40(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 48(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; X64-NEXT: movq %rdi, 56(%rcx) -; X64-NEXT: movq %r9, 64(%rcx) -; X64-NEXT: movq %r10, 72(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, (%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 8(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 16(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 24(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 32(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 40(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 48(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq %rsi, 56(%rcx) +; X64-NEXT: movq %r8, 64(%rcx) +; X64-NEXT: movq %r9, 72(%rcx) ; X64-NEXT: movq %rbp, 80(%rcx) -; X64-NEXT: movq %rbx, 88(%rcx) -; X64-NEXT: movq %r8, 96(%rcx) -; X64-NEXT: movq %rsi, 104(%rcx) +; X64-NEXT: movq %rdi, 88(%rcx) +; X64-NEXT: movq %r11, 96(%rcx) +; X64-NEXT: movq %rbx, 104(%rcx) ; X64-NEXT: movq %rax, 112(%rcx) ; X64-NEXT: movq %rdx, 120(%rcx) ; X64-NEXT: addq $352, %rsp # imm = 0x160 Index: test/CodeGen/X86/mul-i256.ll =================================================================== --- test/CodeGen/X86/mul-i256.ll +++ test/CodeGen/X86/mul-i256.ll @@ -15,8 +15,8 @@ ; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 20 -; X32-NEXT: subl $88, %esp -; X32-NEXT: .cfi_def_cfa_offset 108 +; X32-NEXT: subl $84, %esp +; X32-NEXT: .cfi_def_cfa_offset 104 ; X32-NEXT: .cfi_offset %esi, -20 ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 @@ -31,7 +31,7 @@ ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebx @@ -53,157 +53,155 @@ ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, %ebp +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %esi, %eax -; X32-NEXT: adcl %ebp, %edx ; X32-NEXT: addl %edi, %eax +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl (%esi), %ebp -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl (%ecx), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl 4(%esi), %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%ecx), %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl 8(%edi), %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl 8(%eax), %ebp +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl 12(%ebx), %edx +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl 12(%edi), %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %esi, %eax +; X32-NEXT: mull %edx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull (%esp) # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %ebx, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: mull %ecx +; X32-NEXT: adcl %eax, %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: addl %esi, %edi +; X32-NEXT: adcl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb (%esp) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ebx, %ebp -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx +; X32-NEXT: mull (%esp) # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -212,143 +210,141 @@ ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 16(%ecx), %esi -; X32-NEXT: imull %esi, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebx, %edx +; X32-NEXT: movl 16(%ecx), %ebx +; X32-NEXT: movl %edi, %esi +; X32-NEXT: imull %ebx, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl 20(%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: imull %eax, %edi -; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl %edx, %ebp ; X32-NEXT: movl 24(%ecx), %eax -; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ecx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx +; X32-NEXT: movl 28(%ecx), %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %edi, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: mull %edi +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %ebp ; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl 28(%ebp), %ebp -; X32-NEXT: imull %ebx, %ebp -; X32-NEXT: addl %edx, %ebp -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %edx, (%esp) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: setb %cl +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: mull %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: adcl %ebp, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl 28(%ebx), %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %esi, %ecx -; X32-NEXT: movl 24(%ebx), %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 28(%edi), %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %ecx +; X32-NEXT: movl 24(%edi), %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl 16(%ebx), %ebp -; X32-NEXT: movl 20(%ebx), %ebx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: movl 16(%edi), %ecx +; X32-NEXT: movl 20(%edi), %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %ebx, %edx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebp -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ebp, %ecx -; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: adcl %ecx, %edi -; X32-NEXT: setb %cl +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, (%ecx) +; X32-NEXT: movl %edi, (%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 4(%ecx) +; X32-NEXT: movl %edi, 4(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 8(%ecx) +; X32-NEXT: movl %edi, 8(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 12(%ecx) -; X32-NEXT: movl %ebx, 16(%ecx) -; X32-NEXT: movl %esi, 20(%ecx) -; X32-NEXT: movl %eax, 24(%ecx) -; X32-NEXT: movl %edx, 28(%ecx) -; X32-NEXT: addl $88, %esp +; X32-NEXT: movl %edi, 12(%esi) +; X32-NEXT: movl %ebx, 16(%esi) +; X32-NEXT: movl %ecx, 20(%esi) +; X32-NEXT: movl %eax, 24(%esi) +; X32-NEXT: movl %edx, 28(%esi) +; X32-NEXT: addl $84, %esp ; X32-NEXT: .cfi_def_cfa_offset 20 ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 16 @@ -366,9 +362,12 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %r14 ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %r12 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .cfi_offset %r12, -32 ; X64-NEXT: .cfi_offset %r14, -24 ; X64-NEXT: .cfi_offset %r15, -16 ; X64-NEXT: movq %rdx, %r9 @@ -376,33 +375,33 @@ ; X64-NEXT: movq 8(%rdi), %r8 ; X64-NEXT: movq 16(%rdi), %rbx ; X64-NEXT: movq 16(%rsi), %r10 -; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: movq (%rsi), %r12 ; X64-NEXT: movq 8(%rsi), %r15 ; X64-NEXT: movq 24(%rdi), %rdi -; X64-NEXT: imulq %rcx, %rdi -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: imulq %r12, %rdi +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: imulq %r15, %rbx +; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: movq %r10, %rdi -; X64-NEXT: imulq %r8, %rdi +; X64-NEXT: movq %r10, %rcx +; X64-NEXT: imulq %r8, %rcx ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: movq 24(%rsi), %rdi ; X64-NEXT: imulq %r11, %rdi +; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: addq %r14, %r10 ; X64-NEXT: adcq %rbx, %rdi ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %rcx +; X64-NEXT: mulq %r12 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -426,6 +425,8 @@ ; X64-NEXT: movq %rax, 16(%r9) ; X64-NEXT: movq %rdx, 24(%r9) ; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r12 ; X64-NEXT: .cfi_def_cfa_offset 24 ; X64-NEXT: popq %r14 ; X64-NEXT: .cfi_def_cfa_offset 16 Index: test/CodeGen/X86/mul-i512.ll =================================================================== --- test/CodeGen/X86/mul-i512.ll +++ test/CodeGen/X86/mul-i512.ll @@ -10,44 +10,43 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: subl $244, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 20(%ecx), %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl 16(%ecx), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl 20(%edx), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 16(%edx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl %esi, %edi +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %ebx -; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl 24(%ebp), %eax +; X32-NEXT: movl 24(%edi), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %edi +; X32-NEXT: adcl %edx, %ebp ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -65,7 +64,7 @@ ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %ebp, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx @@ -108,7 +107,7 @@ ; X32-NEXT: movl 16(%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebp -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %ecx, %ebp @@ -121,7 +120,7 @@ ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -163,7 +162,7 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -176,7 +175,7 @@ ; X32-NEXT: addl %edi, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl (%esp), %ebp # 4-byte Reload ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx @@ -257,80 +256,82 @@ ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebp +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -341,102 +342,102 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: adcl %edx, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ebp, %edx +; X32-NEXT: addl %ebx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%eax), %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %esi, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: setb %bl +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %edi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -445,192 +446,192 @@ ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %bl +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb %bl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi ; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebp -; X32-NEXT: adcl %edx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebp -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ebp -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebp -; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebp -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload @@ -639,7 +640,7 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebp @@ -647,30 +648,30 @@ ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb %bl +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -710,97 +711,96 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl %ebp, %edi -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull %ebx ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -810,16 +810,16 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: setb (%esp) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -833,7 +833,6 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebp @@ -854,7 +853,7 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -915,7 +914,7 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %eax, %edi @@ -951,14 +950,14 @@ ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -1004,7 +1003,7 @@ ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -1017,27 +1016,26 @@ ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx @@ -1050,48 +1048,46 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %ebp -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: imull %eax, %ecx +; X32-NEXT: mull %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebp, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, %ebp +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: addl %edx, %ebp ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: imull %edi, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: adcl %ebp, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ecx, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx @@ -1106,59 +1102,57 @@ ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl 60(%edi), %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl 56(%edi), %esi +; X32-NEXT: movl %eax, %ebp ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx ; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl 48(%edi), %ebx -; X32-NEXT: movl 52(%edi), %ebp +; X32-NEXT: movl 48(%edi), %ecx +; X32-NEXT: movl 52(%edi), %ebx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %edi -; X32-NEXT: imull %ebp, %edi -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl %eax, %edx +; X32-NEXT: imull %ebx, %edx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %ecx, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edx, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %ebp -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl %edi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -1166,45 +1160,44 @@ ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 40(%esi), %ebx +; X32-NEXT: movl 40(%esi), %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl 44(%esi), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl 44(%ecx), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: mull %esi +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebp, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1214,81 +1207,80 @@ ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl 32(%esi), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 32(%edi), %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl 36(%esi), %esi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 36(%edi), %ebx +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %esi -; X32-NEXT: movzbl %bl, %eax -; X32-NEXT: adcl %eax, %ebp -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, %edi +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %eax, %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl %ebp, %eax -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebx, %esi +; X32-NEXT: adcl %ecx, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -1304,221 +1296,220 @@ ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebp, %eax -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb (%esp) # 1-byte Folded Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 48(%ecx), %ebp -; X32-NEXT: imull %ebp, %ebx -; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl 48(%ebx), %esi +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl 52(%ecx), %eax +; X32-NEXT: movl 52(%ebx), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: imull %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: addl %edx, %edi ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl 56(%ecx), %eax -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: imull %esi, %edi +; X32-NEXT: movl 56(%ebx), %eax +; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: imull %ecx, %edx +; X32-NEXT: movl 60(%ebx), %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %ebp +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: mull %ebx +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl 60(%ebx), %ebx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: imull %ecx, %ebx -; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: mull %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %esi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: setb %cl +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebp -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: mull %ebx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edx +; X32-NEXT: adcl %ebp, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NEXT: imull %ebp, %edi -; X32-NEXT: movl %ebp, %eax -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: imull %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %edi, %esi +; X32-NEXT: addl %edx, %esi +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %edi, %ebp +; X32-NEXT: addl %edx, %ebp ; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebp -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebp -; X32-NEXT: addl %ecx, %ebp -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %ebx +; X32-NEXT: mull %ebp +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, (%ecx) +; X32-NEXT: movl %edi, (%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 4(%ecx) +; X32-NEXT: movl %edi, 4(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 8(%ecx) +; X32-NEXT: movl %edi, 8(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 12(%ecx) +; X32-NEXT: movl %edi, 12(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 16(%ecx) +; X32-NEXT: movl %edi, 16(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 20(%ecx) +; X32-NEXT: movl %edi, 20(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 24(%ecx) +; X32-NEXT: movl %edi, 24(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 28(%ecx) +; X32-NEXT: movl %edi, 28(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 32(%ecx) +; X32-NEXT: movl %edi, 32(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 36(%ecx) +; X32-NEXT: movl %edi, 36(%esi) ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: movl %edi, 40(%ecx) -; X32-NEXT: movl %esi, 44(%ecx) -; X32-NEXT: movl %edx, 48(%ecx) -; X32-NEXT: movl %ebp, 52(%ecx) -; X32-NEXT: movl %eax, 56(%ecx) -; X32-NEXT: movl %ebx, 60(%ecx) +; X32-NEXT: movl %edi, 40(%esi) +; X32-NEXT: movl %ecx, 44(%esi) +; X32-NEXT: movl %ebp, 48(%esi) +; X32-NEXT: movl %ebx, 52(%esi) +; X32-NEXT: movl %eax, 56(%esi) +; X32-NEXT: movl %edx, 60(%esi) ; X32-NEXT: addl $244, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi @@ -1536,307 +1527,303 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: movq %rdx, (%rsp) # 8-byte Spill -; X64-NEXT: movq 24(%rdi), %r11 -; X64-NEXT: movq 16(%rdi), %r15 +; X64-NEXT: movq 24(%rdi), %r15 +; X64-NEXT: movq 16(%rdi), %r13 ; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: movq 8(%rsi), %rbp -; X64-NEXT: movq %r15, %rax -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq (%rsi), %r10 +; X64-NEXT: movq 8(%rsi), %r9 +; X64-NEXT: movq %r13, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r8 -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rsi, %r10 -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r9, %rsi -; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %r15, %rax ; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rbp +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rbp +; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %r11, %rbx +; X64-NEXT: adcq $0, %rbp +; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, %r11 +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %r9 -; X64-NEXT: adcq %rbx, %rcx +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rbx, %r14 +; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %al -; X64-NEXT: movzbl %al, %ebx -; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rcx, %rbp -; X64-NEXT: adcq %rbx, %rsi +; X64-NEXT: movzbl %al, %ebp +; X64-NEXT: movq %r15, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: adcq %rbp, %rbx ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: movq %r10, %rbx ; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, %r13 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq %rdx, %r15 +; X64-NEXT: movq %rax, %r12 +; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: addq %r10, %r15 -; X64-NEXT: adcq %r13, %rdx -; X64-NEXT: addq %rbp, %r15 -; X64-NEXT: adcq %rsi, %rdx -; X64-NEXT: movq %rdx, %r12 -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq 8(%rdi), %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: mulq %rbx -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %r11, %rsi -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: addq %r12, %rax +; X64-NEXT: adcq %r15, %r13 ; X64-NEXT: addq %rsi, %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: adcq %rbx, %r13 +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq (%rdi), %rsi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rbx -; X64-NEXT: setb %r11b -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r14 +; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq 8(%rdi), %rbx +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %r9, %rbp +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rsi, %r10 +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: addq %rbp, %rax +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %rdi, %rsi +; X64-NEXT: setb %r9b +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rbx, %rbp -; X64-NEXT: movzbl %r11b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: addq %rsi, %rbp +; X64-NEXT: movzbl %r9b, %eax +; X64-NEXT: adcq %rax, %rdi +; X64-NEXT: movq %r10, %rbx +; X64-NEXT: movq %r10, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rax, %r10 -; X64-NEXT: adcq %rdx, %r13 -; X64-NEXT: addq %rbp, %r10 -; X64-NEXT: adcq %rsi, %r13 -; X64-NEXT: addq %r8, %r10 -; X64-NEXT: adcq %r9, %r13 -; X64-NEXT: adcq $0, %r15 -; X64-NEXT: adcq $0, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: addq %rax, %r12 +; X64-NEXT: adcq %rdx, %r15 +; X64-NEXT: addq %rbp, %r12 +; X64-NEXT: adcq %rdi, %r15 +; X64-NEXT: addq %r8, %r12 +; X64-NEXT: adcq %r14, %r15 +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, %r13 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq 16(%rsi), %r8 -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %r11 +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %r11, %rbp +; X64-NEXT: adcq $0, %rdi +; X64-NEXT: movq 24(%rsi), %r11 ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %rdi, %rbx -; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq 24(%rsi), %rdi -; X64-NEXT: movq %r9, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: addq %rbx, %rax +; X64-NEXT: addq %rbp, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %rbp, %rsi +; X64-NEXT: adcq %rdi, %rsi ; X64-NEXT: setb %bpl -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rsi, %r9 +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rax, %r14 +; X64-NEXT: addq %rsi, %r14 ; X64-NEXT: movzbl %bpl, %eax -; X64-NEXT: adcq %rax, %rbx +; X64-NEXT: adcq %rax, %rdi ; X64-NEXT: movq %r8, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rax, %r11 -; X64-NEXT: adcq %rdx, %r14 -; X64-NEXT: addq %r9, %r11 -; X64-NEXT: adcq %rbx, %r14 -; X64-NEXT: addq %r10, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: adcq $0, %r11 -; X64-NEXT: adcq $0, %r14 -; X64-NEXT: addq %r15, %r11 -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: setb %r9b -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: addq %rax, %r9 +; X64-NEXT: adcq %rdx, %r10 +; X64-NEXT: addq %r14, %r9 +; X64-NEXT: adcq %rdi, %r10 +; X64-NEXT: addq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq $0, %r9 +; X64-NEXT: adcq $0, %r10 +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload +; X64-NEXT: adcq %r13, %r10 +; X64-NEXT: setb %r14b +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: movq %r15, %rax +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r10, %rbx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movq %rax, %rdi ; X64-NEXT: adcq %rsi, %rcx -; X64-NEXT: setb %sil -; X64-NEXT: movq %r15, %rax -; X64-NEXT: mulq %rdi +; X64-NEXT: setb %bl +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r11 ; X64-NEXT: addq %rcx, %rax -; X64-NEXT: movzbl %sil, %ecx +; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; X64-NEXT: addq %rbp, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload -; X64-NEXT: addq %rax, %rsi -; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: addq %r11, %r12 -; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq %r14, %rbx -; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: adcq $0, %rcx +; X64-NEXT: addq %rbp, %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: adcq %rdx, %rsi +; X64-NEXT: addq %r9, %r15 +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq %r10, %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movzbl %r14b, %eax +; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: movq 32(%rcx), %rsi -; X64-NEXT: imulq %rsi, %rdi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq 32(%rsi), %rcx +; X64-NEXT: imulq %rcx, %r11 +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, %r10 -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq 40(%rcx), %r9 -; X64-NEXT: imulq %r9, %r8 +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: movq 40(%rsi), %r10 +; X64-NEXT: imulq %r10, %r8 +; X64-NEXT: addq %r11, %r8 ; X64-NEXT: addq %rdx, %r8 -; X64-NEXT: movq 48(%rcx), %rax -; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; X64-NEXT: imulq %rcx, %rdi +; X64-NEXT: movq 48(%rsi), %rax +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; X64-NEXT: imulq %r11, %rdx +; X64-NEXT: movq 56(%rsi), %rbx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: mulq %rbp -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rdi, %rdx -; X64-NEXT: movq 56(%rbx), %rbx ; X64-NEXT: imulq %rbp, %rbx ; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: addq %r10, %r14 +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %rdx, %rbx +; X64-NEXT: addq %r9, %rdi ; X64-NEXT: adcq %r8, %rbx ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq %rbp, %r10 -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: movq %rbp, %r8 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rdi, %rbp -; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: addq %r9, %rbp +; X64-NEXT: adcq $0, %rcx +; X64-NEXT: movq %r8, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: addq %rbp, %r13 -; X64-NEXT: adcq %rsi, %rdi +; X64-NEXT: adcq %rcx, %rsi ; X64-NEXT: setb %cl -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r9 -; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, %r9 -; X64-NEXT: addq %rdi, %r9 +; X64-NEXT: movq %r11, %rax +; X64-NEXT: mulq %r10 +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: movq %rax, %r11 +; X64-NEXT: addq %rsi, %r11 ; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq %r14, %r9 -; X64-NEXT: adcq %rbx, %r11 -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq 56(%rbp), %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; X64-NEXT: imulq %r10, %rcx -; X64-NEXT: movq 48(%rbp), %rbx -; X64-NEXT: movq %r10, %rax -; X64-NEXT: mulq %rbx +; X64-NEXT: adcq %rax, %r10 +; X64-NEXT: addq %rdi, %r11 +; X64-NEXT: adcq %rbx, %r10 +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: movq 56(%rbx), %rcx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; X64-NEXT: imulq %r9, %rcx +; X64-NEXT: movq 48(%rbx), %rdi +; X64-NEXT: movq %r9, %rax +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload -; X64-NEXT: imulq %r15, %rbx -; X64-NEXT: addq %rdx, %rbx -; X64-NEXT: movq 32(%rbp), %rdi -; X64-NEXT: movq 40(%rbp), %r8 +; X64-NEXT: imulq %r15, %rdi +; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: addq %rdx, %rdi +; X64-NEXT: movq 32(%rbx), %rbp +; X64-NEXT: movq 40(%rbx), %r12 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: imulq %r8, %rcx -; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rax, %r14 -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: imulq %rdi, %rax -; X64-NEXT: addq %rdx, %rax -; X64-NEXT: addq %rsi, %r14 -; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r10 -; X64-NEXT: movq %rdx, %r12 +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: imulq %r12, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: imulq %rbp, %rcx +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: mulq %rbp +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: addq %rdx, %rcx +; X64-NEXT: addq %rsi, %r8 +; X64-NEXT: adcq %rdi, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rbp, %rax +; X64-NEXT: mulq %r9 +; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r12, %rax +; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %rbx -; X64-NEXT: addq %r12, %rbx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: addq %r14, %rdi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: movq %rdx, %rbp -; X64-NEXT: movq %rax, %rdi -; X64-NEXT: addq %rbx, %rdi -; X64-NEXT: adcq %rcx, %rbp +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: movq %rax, %rbp +; X64-NEXT: addq %rdi, %rbp +; X64-NEXT: adcq %rcx, %rbx ; X64-NEXT: setb %cl -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %r15 -; X64-NEXT: addq %rbp, %rax +; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq %r14, %rax +; X64-NEXT: addq %r8, %rax ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq %r13, %rdi -; X64-NEXT: adcq %r9, %rax -; X64-NEXT: adcq %r11, %rdx +; X64-NEXT: adcq %r13, %rbp +; X64-NEXT: adcq %r11, %rax +; X64-NEXT: adcq %r10, %rdx ; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload ; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload ; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, (%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, 8(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, 16(%rcx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; X64-NEXT: movq %rbp, 24(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, (%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, 8(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, 16(%rcx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq %rdi, 24(%rcx) ; X64-NEXT: movq %rsi, 32(%rcx) -; X64-NEXT: movq %rdi, 40(%rcx) +; X64-NEXT: movq %rbp, 40(%rcx) ; X64-NEXT: movq %rax, 48(%rcx) ; X64-NEXT: movq %rdx, 56(%rcx) ; X64-NEXT: addq $8, %rsp Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -9,8 +9,8 @@ ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: mulq %rdx -; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi +; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: addq %rsi, %rdx ; X64-NEXT: retq ; @@ -30,46 +30,46 @@ ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: imull %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: imull %edi, %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %edx ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: imull %edi, %ecx ; X86-NEXT: addl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: imull {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: imull %ebp, %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: mull %ebp -; X86-NEXT: addl %esi, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edx, %edi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: addl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: adcl %ecx, %esi ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: mull %ecx +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: mull %edi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl %ebx, %ecx -; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: adcl %edi, %ebx ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) @@ -77,7 +77,7 @@ ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl %edi, %edx +; X86-NEXT: adcl %esi, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%esp), %esi # 4-byte Reload ; X86-NEXT: movl %esi, (%ecx) @@ -107,14 +107,15 @@ ; X64-LABEL: PR13897: ; X64: # %bb.0: # %"0x0" ; X64-NEXT: movl {{.*}}(%rip), %ecx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: shlq $32, %rsi ; X64-NEXT: movabsq $4294967297, %rdx # imm = 0x100000001 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdx -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: shlq $32, %rcx -; X64-NEXT: addq %rcx, %rdx +; X64-NEXT: addq %rcx, %rsi +; X64-NEXT: addq %rdx, %rsi ; X64-NEXT: movq %rax, {{.*}}(%rip) -; X64-NEXT: movq %rdx, aaa+{{.*}}(%rip) +; X64-NEXT: movq %rsi, aaa+{{.*}}(%rip) ; X64-NEXT: retq ; ; X86-LABEL: PR13897: Index: test/CodeGen/X86/mul64.ll =================================================================== --- test/CodeGen/X86/mul64.ll +++ test/CodeGen/X86/mul64.ll @@ -11,8 +11,8 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl %ecx, %edx ; X32-NEXT: imull {{[0-9]+}}(%esp), %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: addl %esi, %edx ; X32-NEXT: popl %esi ; X32-NEXT: retl Index: test/CodeGen/X86/popcnt.ll =================================================================== --- test/CodeGen/X86/popcnt.ll +++ test/CodeGen/X86/popcnt.ll @@ -350,23 +350,23 @@ ; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F ; X32-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %edx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: shrl %edi -; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %edi, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X32-NOSSE-NEXT: addl %esi, %edx +; X32-NOSSE-NEXT: movl %ecx, %esi +; X32-NOSSE-NEXT: shrl %esi +; X32-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X32-NOSSE-NEXT: subl %esi, %ecx +; X32-NOSSE-NEXT: movl %ecx, %esi +; X32-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 ; X32-NOSSE-NEXT: shrl $2, %ecx ; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X32-NOSSE-NEXT: addl %edi, %ecx -; X32-NOSSE-NEXT: movl %ecx, %edi -; X32-NOSSE-NEXT: shrl $4, %edi -; X32-NOSSE-NEXT: addl %ecx, %edi -; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 +; X32-NOSSE-NEXT: addl %esi, %ecx +; X32-NOSSE-NEXT: movl %ecx, %esi +; X32-NOSSE-NEXT: shrl $4, %esi +; X32-NOSSE-NEXT: addl %ecx, %esi +; X32-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: imull $16843009, %esi, %ecx # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %ecx ; X32-NOSSE-NEXT: addl %edx, %ecx -; X32-NOSSE-NEXT: addl %esi, %ecx ; X32-NOSSE-NEXT: movl %ecx, (%eax) ; X32-NOSSE-NEXT: movl $0, 12(%eax) ; X32-NOSSE-NEXT: movl $0, 8(%eax) @@ -418,20 +418,18 @@ ; ; X32-POPCNT-LABEL: cnt128: ; X32-POPCNT: # %bb.0: -; X32-POPCNT-NEXT: pushl %esi ; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx ; X32-POPCNT-NEXT: addl %ecx, %edx ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi -; X32-POPCNT-NEXT: addl %ecx, %esi -; X32-POPCNT-NEXT: addl %edx, %esi -; X32-POPCNT-NEXT: movl %esi, (%eax) +; X32-POPCNT-NEXT: addl %edx, %ecx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X32-POPCNT-NEXT: addl %ecx, %edx +; X32-POPCNT-NEXT: movl %edx, (%eax) ; X32-POPCNT-NEXT: movl $0, 12(%eax) ; X32-POPCNT-NEXT: movl $0, 8(%eax) ; X32-POPCNT-NEXT: movl $0, 4(%eax) -; X32-POPCNT-NEXT: popl %esi ; X32-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128: @@ -792,83 +790,80 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { ; X32-NOSSE-LABEL: cnt128_optsize: ; X32-NOSSE: # %bb.0: -; X32-NOSSE-NEXT: pushl %ebp ; X32-NOSSE-NEXT: pushl %ebx ; X32-NOSSE-NEXT: pushl %edi ; X32-NOSSE-NEXT: pushl %esi ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NOSSE-NEXT: movl %ebx, %ecx -; X32-NOSSE-NEXT: shrl %ecx -; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X32-NOSSE-NEXT: andl %edi, %ecx -; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 -; X32-NOSSE-NEXT: subl %ecx, %ebx -; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 -; X32-NOSSE-NEXT: movl %ebx, %ebp -; X32-NOSSE-NEXT: andl %ecx, %ebp -; X32-NOSSE-NEXT: shrl $2, %ebx -; X32-NOSSE-NEXT: andl %ecx, %ebx -; X32-NOSSE-NEXT: addl %ebp, %ebx -; X32-NOSSE-NEXT: movl %ebx, %ebp -; X32-NOSSE-NEXT: shrl $4, %ebp -; X32-NOSSE-NEXT: addl %ebx, %ebp -; X32-NOSSE-NEXT: movl %eax, %ebx -; X32-NOSSE-NEXT: shrl %ebx -; X32-NOSSE-NEXT: andl %edi, %ebx -; X32-NOSSE-NEXT: subl %ebx, %eax -; X32-NOSSE-NEXT: movl %eax, %ebx -; X32-NOSSE-NEXT: andl %ecx, %ebx -; X32-NOSSE-NEXT: shrl $2, %eax +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NOSSE-NEXT: movl %edi, %eax +; X32-NOSSE-NEXT: shrl %eax +; X32-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555 ; X32-NOSSE-NEXT: andl %ecx, %eax -; X32-NOSSE-NEXT: addl %ebx, %eax -; X32-NOSSE-NEXT: movl %eax, %edi +; X32-NOSSE-NEXT: subl %eax, %edi +; X32-NOSSE-NEXT: movl $858993459, %eax # imm = 0x33333333 +; X32-NOSSE-NEXT: movl %edi, %ebx +; X32-NOSSE-NEXT: andl %eax, %ebx +; X32-NOSSE-NEXT: shrl $2, %edi +; X32-NOSSE-NEXT: andl %eax, %edi +; X32-NOSSE-NEXT: addl %ebx, %edi +; X32-NOSSE-NEXT: movl %edi, %ebx +; X32-NOSSE-NEXT: shrl $4, %ebx +; X32-NOSSE-NEXT: addl %edi, %ebx +; X32-NOSSE-NEXT: movl %esi, %edi +; X32-NOSSE-NEXT: shrl %edi +; X32-NOSSE-NEXT: andl %ecx, %edi +; X32-NOSSE-NEXT: subl %edi, %esi +; X32-NOSSE-NEXT: movl %esi, %edi +; X32-NOSSE-NEXT: andl %eax, %edi +; X32-NOSSE-NEXT: shrl $2, %esi +; X32-NOSSE-NEXT: andl %eax, %esi +; X32-NOSSE-NEXT: addl %edi, %esi +; X32-NOSSE-NEXT: movl %esi, %edi ; X32-NOSSE-NEXT: shrl $4, %edi -; X32-NOSSE-NEXT: addl %eax, %edi -; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F -; X32-NOSSE-NEXT: andl %ebx, %ebp -; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %eax -; X32-NOSSE-NEXT: andl %ebx, %edi +; X32-NOSSE-NEXT: addl %esi, %edi +; X32-NOSSE-NEXT: movl $252645135, %esi # imm = 0xF0F0F0F +; X32-NOSSE-NEXT: andl %esi, %ebx +; X32-NOSSE-NEXT: imull $16843009, %ebx, %ebx # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %ebx +; X32-NOSSE-NEXT: andl %esi, %edi ; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %edi -; X32-NOSSE-NEXT: addl %eax, %edi -; X32-NOSSE-NEXT: movl %esi, %eax -; X32-NOSSE-NEXT: shrl %eax -; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 -; X32-NOSSE-NEXT: andl %ebp, %eax -; X32-NOSSE-NEXT: subl %eax, %esi -; X32-NOSSE-NEXT: movl %esi, %eax -; X32-NOSSE-NEXT: andl %ecx, %eax -; X32-NOSSE-NEXT: shrl $2, %esi -; X32-NOSSE-NEXT: andl %ecx, %esi -; X32-NOSSE-NEXT: addl %eax, %esi -; X32-NOSSE-NEXT: movl %esi, %eax -; X32-NOSSE-NEXT: shrl $4, %eax -; X32-NOSSE-NEXT: addl %esi, %eax -; X32-NOSSE-NEXT: movl %edx, %esi -; X32-NOSSE-NEXT: shrl %esi -; X32-NOSSE-NEXT: andl %ebp, %esi -; X32-NOSSE-NEXT: subl %esi, %edx -; X32-NOSSE-NEXT: movl %edx, %esi -; X32-NOSSE-NEXT: andl %ecx, %esi +; X32-NOSSE-NEXT: addl %ebx, %edi +; X32-NOSSE-NEXT: movl %edx, %ebx +; X32-NOSSE-NEXT: shrl %ebx +; X32-NOSSE-NEXT: andl %ecx, %ebx +; X32-NOSSE-NEXT: subl %ebx, %edx +; X32-NOSSE-NEXT: movl %edx, %ebx +; X32-NOSSE-NEXT: andl %eax, %ebx ; X32-NOSSE-NEXT: shrl $2, %edx -; X32-NOSSE-NEXT: andl %ecx, %edx -; X32-NOSSE-NEXT: addl %esi, %edx +; X32-NOSSE-NEXT: andl %eax, %edx +; X32-NOSSE-NEXT: addl %ebx, %edx +; X32-NOSSE-NEXT: movl %edx, %ebx +; X32-NOSSE-NEXT: shrl $4, %ebx +; X32-NOSSE-NEXT: addl %edx, %ebx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NOSSE-NEXT: andl %esi, %ebx +; X32-NOSSE-NEXT: imull $16843009, %ebx, %ebx # imm = 0x1010101 +; X32-NOSSE-NEXT: shrl $24, %ebx +; X32-NOSSE-NEXT: addl %edi, %ebx +; X32-NOSSE-NEXT: movl %edx, %edi +; X32-NOSSE-NEXT: shrl %edi +; X32-NOSSE-NEXT: andl %ecx, %edi +; X32-NOSSE-NEXT: subl %edi, %edx +; X32-NOSSE-NEXT: movl %edx, %ecx +; X32-NOSSE-NEXT: andl %eax, %ecx +; X32-NOSSE-NEXT: shrl $2, %edx +; X32-NOSSE-NEXT: andl %eax, %edx +; X32-NOSSE-NEXT: addl %ecx, %edx ; X32-NOSSE-NEXT: movl %edx, %ecx ; X32-NOSSE-NEXT: shrl $4, %ecx ; X32-NOSSE-NEXT: addl %edx, %ecx -; X32-NOSSE-NEXT: andl %ebx, %eax -; X32-NOSSE-NEXT: andl %ebx, %ecx -; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 -; X32-NOSSE-NEXT: shrl $24, %eax +; X32-NOSSE-NEXT: andl %esi, %ecx +; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101 ; X32-NOSSE-NEXT: shrl $24, %ecx -; X32-NOSSE-NEXT: addl %eax, %ecx -; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NOSSE-NEXT: addl %edi, %ecx +; X32-NOSSE-NEXT: addl %ebx, %ecx ; X32-NOSSE-NEXT: xorl %edx, %edx ; X32-NOSSE-NEXT: movl %edx, 12(%eax) ; X32-NOSSE-NEXT: movl %edx, 8(%eax) @@ -877,7 +872,6 @@ ; X32-NOSSE-NEXT: popl %esi ; X32-NOSSE-NEXT: popl %edi ; X32-NOSSE-NEXT: popl %ebx -; X32-NOSSE-NEXT: popl %ebp ; X32-NOSSE-NEXT: retl $4 ; ; X64-LABEL: cnt128_optsize: @@ -922,21 +916,19 @@ ; ; X32-POPCNT-LABEL: cnt128_optsize: ; X32-POPCNT: # %bb.0: -; X32-POPCNT-NEXT: pushl %esi ; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx ; X32-POPCNT-NEXT: addl %ecx, %edx ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi -; X32-POPCNT-NEXT: addl %ecx, %esi -; X32-POPCNT-NEXT: addl %edx, %esi +; X32-POPCNT-NEXT: addl %edx, %ecx +; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx +; X32-POPCNT-NEXT: addl %ecx, %edx ; X32-POPCNT-NEXT: xorl %ecx, %ecx ; X32-POPCNT-NEXT: movl %ecx, 12(%eax) ; X32-POPCNT-NEXT: movl %ecx, 8(%eax) ; X32-POPCNT-NEXT: movl %ecx, 4(%eax) -; X32-POPCNT-NEXT: movl %esi, (%eax) -; X32-POPCNT-NEXT: popl %esi +; X32-POPCNT-NEXT: movl %edx, (%eax) ; X32-POPCNT-NEXT: retl $4 ; ; X64-POPCNT-LABEL: cnt128_optsize: Index: test/CodeGen/X86/pr34080-2.ll =================================================================== --- test/CodeGen/X86/pr34080-2.ll +++ test/CodeGen/X86/pr34080-2.ll @@ -31,10 +31,6 @@ ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: imull %edx ; CHECK-NEXT: movl %edx, %edi -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: sarl $7, %edi -; CHECK-NEXT: addl %eax, %edi ; CHECK-NEXT: imull $36525, %esi, %eax # imm = 0x8EAD ; CHECK-NEXT: addl $172251900, %eax # imm = 0xA445AFC ; CHECK-NEXT: movl $1374389535, %edx # imm = 0x51EB851F @@ -43,7 +39,11 @@ ; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: sarl $5, %edx ; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: addl 16(%ebx), %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: sarl $7, %edi ; CHECK-NEXT: addl %edi, %ecx ; CHECK-NEXT: leal 257(%ecx,%edx), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) Index: test/CodeGen/X86/pr36865.ll =================================================================== --- test/CodeGen/X86/pr36865.ll +++ test/CodeGen/X86/pr36865.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl (%rax), %ecx ; CHECK-NEXT: addl 0, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: imull %eax, %ecx Index: test/CodeGen/X86/smul_fix.ll =================================================================== --- test/CodeGen/X86/smul_fix.ll +++ test/CodeGen/X86/smul_fix.ll @@ -231,8 +231,8 @@ ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: addl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 Index: test/CodeGen/X86/statepoint-live-in.ll =================================================================== --- test/CodeGen/X86/statepoint-live-in.ll +++ test/CodeGen/X86/statepoint-live-in.ll @@ -442,11 +442,11 @@ ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %esi, %r15d -; CHECK-NEXT: movl %edx, %r12d -; CHECK-NEXT: movl %ecx, %r13d -; CHECK-NEXT: movl %r8d, %ebp +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edx, %r15d +; CHECK-NEXT: movl %ecx, %r12d +; CHECK-NEXT: movl %r8d, %r13d ; CHECK-NEXT: movl %r9d, %r14d ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill @@ -475,7 +475,6 @@ ; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %r12, %rbx ; CHECK-NEXT: addq %r13, %rbx -; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx @@ -517,6 +516,7 @@ ; CHECK-NEXT: addq %rax, %rbx ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: addq %rbp, %rbx ; CHECK-NEXT: movq %rbx, %rax ; CHECK-NEXT: addq $168, %rsp ; CHECK-NEXT: popq %rbx Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -149,9 +149,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq gen3 -; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %r8d, %ecx ; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: addl %r8d, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -360,9 +360,9 @@ ; CHECK-NEXT: addsd %xmm1, %xmm0 ; CHECK-NEXT: addsd %xmm2, %xmm0 ; CHECK-NEXT: addsd %xmm3, %xmm0 -; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: addq %r8, %rcx ; CHECK-NEXT: addq %rcx, %rax -; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq Index: test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- test/CodeGen/X86/twoaddr-lea.ll +++ test/CodeGen/X86/twoaddr-lea.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ;; X's live range extends beyond the shift, so the register allocator ;; cannot coalesce it with Y. Because of this, a copy needs to be ;; emitted before the shift to save the register value before it is @@ -11,8 +12,13 @@ define i32 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: -; CHECK: movl %edi, %eax -; CHECK: leal 1(%rax) +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: movq _G@{{.*}}(%rip), %rdx +; CHECK-NEXT: movl %ecx, (%rdx) +; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq %Z = add i32 %X, 1 store volatile i32 %Z, i32* @G ret i32 %X @@ -22,12 +28,15 @@ ; The second add should not be transformed to leal nor should it be ; commutted (which would require inserting a copy). define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind { -entry: ; CHECK-LABEL: test2: -; CHECK: leal -; CHECK-NEXT: addl -; CHECK-NEXT: addl -; CHECK-NEXT: ret +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: ## kill: def $esi killed $esi def $rsi +; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi +; CHECK-NEXT: addl %edx, %edi +; CHECK-NEXT: addl %ecx, %edi +; CHECK-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NEXT: retq +entry: %add = add i32 %b, %a %add3 = add i32 %add, %c %add5 = add i32 %add3, %d @@ -36,11 +45,11 @@ ; rdar://9002648 define i64 @test3(i64 %x) nounwind readnone ssp { -entry: ; CHECK-LABEL: test3: -; CHECK: leaq (%rdi,%rdi), %rax -; CHECK-NOT: addq -; CHECK-NEXT: ret +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: leaq (%rdi,%rdi), %rax +; CHECK-NEXT: retq +entry: %0 = shl i64 %x, 1 ret i64 %0 } @@ -52,6 +61,59 @@ ; assert/crash from http://llvm.org/PR28301 ; CHECK-LABEL: ham define void @ham() { +; CHECK-LABEL: ham: +; CHECK: ## %bb.0: ## %bb +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: movq _global@{{.*}}(%rip), %rdx +; CHECK-NEXT: movq _global2@{{.*}}(%rip), %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl $10, %eax +; CHECK-NEXT: jle LBB3_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB3_6: ## %bb2 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB3_7 Depth 2 +; CHECK-NEXT: movl (%rdx), %edi +; CHECK-NEXT: leal (%rdi,%rax), %ecx +; CHECK-NEXT: movslq %ecx, %rcx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB3_7: ## %bb6 +; CHECK-NEXT: ## Parent Loop BB3_6 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movq %rcx, (%rsi) +; CHECK-NEXT: movl %edi, (%rdx) +; CHECK-NEXT: testb %r8b, %r8b +; CHECK-NEXT: jne LBB3_7 +; CHECK-NEXT: ## %bb.8: ## %bb9 +; CHECK-NEXT: ## in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: addq $4, %rax +; CHECK-NEXT: cmpl $10, %eax +; CHECK-NEXT: jg LBB3_6 +; CHECK-NEXT: LBB3_2: ## %bb3.preheader +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB3_3: ## %bb3 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB3_4 Depth 2 +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: subl %ecx, %edx +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: addq $4, %rcx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB3_4: ## %bb4 +; CHECK-NEXT: ## Parent Loop BB3_3 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: jne LBB3_9 +; CHECK-NEXT: ## %bb.5: ## %bb5 +; CHECK-NEXT: ## in Loop: Header=BB3_4 Depth=2 +; CHECK-NEXT: incq %rsi +; CHECK-NEXT: cmpq %rcx, %rsi +; CHECK-NEXT: jl LBB3_4 +; CHECK-NEXT: jmp LBB3_3 +; CHECK-NEXT: LBB3_9: ## %bb8 +; CHECK-NEXT: ud2 bb: br label %bb1 @@ -68,8 +130,6 @@ br label %bb6 bb3: -; CHECK: subl %e[[REG0:[a-z0-9]+]], -; CHECK: addq $4, %r[[REG0]] %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ] %tmp15 = add nuw i64 %tmp14, 4 %tmp16 = trunc i64 %tmp14 to i32 Index: test/CodeGen/X86/umul_fix.ll =================================================================== --- test/CodeGen/X86/umul_fix.ll +++ test/CodeGen/X86/umul_fix.ll @@ -188,8 +188,8 @@ ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: mull %esi ; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %edx ; X86-NEXT: imull {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %ecx, %esi ; X86-NEXT: addl %esi, %edx ; X86-NEXT: popl %esi ; X86-NEXT: retl Index: test/CodeGen/X86/vp2intersect_multiple_pairs.ll =================================================================== --- test/CodeGen/X86/vp2intersect_multiple_pairs.ll +++ test/CodeGen/X86/vp2intersect_multiple_pairs.ll @@ -5,11 +5,6 @@ ; Test with more than four live mask pairs define void @test(<16 x i32> %a0, <16 x i32> %b0, - <16 x i32> %a1, <16 x i32> %b1, - <16 x i32> %a2, <16 x i32> %b2, - <16 x i32> %a3, <16 x i32> %b3, - <16 x i32> %a4, <16 x i32> %b4, - i16* nocapture %m0, i16* nocapture %m1) { ; X86-LABEL: test: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp @@ -56,12 +51,22 @@ ; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: kmovw %k0, %edi +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: kmovw %k1, %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X86-NEXT: kmovw %k0, %edx +; X86-NEXT: addl %edx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: addl %edi, %eax -; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 4-byte Folded Reload -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k3 -; X86-NEXT: kmovw %k2, %edi -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: kmovw %k1, %ecx +; X86-NEXT: movw %ax, (%esi) +; X86-NEXT: leal -8(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl ; ; X64-LABEL: test: ; X64: # %bb.0: # %entry @@ -111,6 +116,23 @@ ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; X64-NEXT: kmovw %k0, %edi ; X64-NEXT: kmovw %k1, %ebx +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: leal (%rbx,%rsi), %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: addl %edx, %ecx +; X64-NEXT: addl %edi, %ecx +; X64-NEXT: movw %cx, (%r14) +; X64-NEXT: leaq -16(%rbp), %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa %rsp, 8 +; X64-NEXT: retq + <16 x i32> %a1, <16 x i32> %b1, + <16 x i32> %a2, <16 x i32> %b2, + <16 x i32> %a3, <16 x i32> %b3, + <16 x i32> %a4, <16 x i32> %b4, + i16* nocapture %m0, i16* nocapture %m1) { entry: %0 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a0, <16 x i32> %b0) %1 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a1, <16 x i32> %b1) Index: test/CodeGen/X86/win-smallparams.ll =================================================================== --- test/CodeGen/X86/win-smallparams.ll +++ test/CodeGen/X86/win-smallparams.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; When we accept small parameters on Windows, make sure we do not assume they ; are zero or sign extended in memory or in registers. @@ -7,12 +8,86 @@ ; RUN: llc < %s -mtriple=i686-windows-gnu | FileCheck %s --check-prefix=WIN32 define void @call() { +; WIN64-LABEL: call: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: .seh_stackalloc 56 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: movw $6, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movb $5, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movb $1, %cl +; WIN64-NEXT: movw $2, %dx +; WIN64-NEXT: movb $3, %r8b +; WIN64-NEXT: movw $4, %r9w +; WIN64-NEXT: callq manyargs +; WIN64-NEXT: nop +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc +; +; WIN32-LABEL: call: +; WIN32: # %bb.0: # %entry +; WIN32-NEXT: pushl $6 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: pushl $5 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: pushl $4 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: pushl $3 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: pushl $2 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: pushl $1 +; WIN32-NEXT: .cfi_adjust_cfa_offset 4 +; WIN32-NEXT: calll _manyargs +; WIN32-NEXT: addl $24, %esp +; WIN32-NEXT: .cfi_adjust_cfa_offset -24 +; WIN32-NEXT: retl entry: %rv = call i32 @manyargs(i8 1, i16 2, i8 3, i16 4, i8 5, i16 6) ret void } define i32 @manyargs(i8 %a, i16 %b, i8 %c, i16 %d, i8 %e, i16 %f) { +; WIN64-LABEL: manyargs: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d +; WIN64-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; WIN64-NEXT: movsbl %cl, %ecx +; WIN64-NEXT: movswl %dx, %eax +; WIN64-NEXT: movzbl %r8b, %edx +; WIN64-NEXT: addl %eax, %edx +; WIN64-NEXT: movzwl %r9w, %eax +; WIN64-NEXT: addl %edx, %eax +; WIN64-NEXT: addl %r11d, %eax +; WIN64-NEXT: addl %r10d, %eax +; WIN64-NEXT: addl %ecx, %eax +; WIN64-NEXT: retq +; +; WIN32-LABEL: manyargs: +; WIN32: # %bb.0: # %entry +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: .cfi_def_cfa_offset 8 +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: .cfi_def_cfa_offset 12 +; WIN32-NEXT: .cfi_offset %esi, -12 +; WIN32-NEXT: .cfi_offset %edi, -8 +; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: movswl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: addl %esi, %edi +; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: addl %eax, %edi +; WIN32-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl %edi, %eax +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: retl entry: %aa = sext i8 %a to i32 %bb = sext i16 %b to i32 Index: test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll =================================================================== --- test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll +++ test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py for function "bar" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s ;; In functions with 'no_caller_saved_registers' attribute, all registers should @@ -12,13 +12,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rdx ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: .cfi_offset %rdx, -16 ; CHECK-NEXT: .cfi_offset %xmm1, -32 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $4, %eax -; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: popq %rdx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -27,20 +27,26 @@ } ;; Because "bar" has 'no_caller_saved_registers' attribute, function "foo" -;; doesn't need to preserve registers except for the arguments passed +;; doesn't need to preserve registers except for the arguments passed ;; to "bar" (%esi, %edi and %xmm0). define x86_64_sysvcc float @foo(i32 %a0, i32 %a1, float %b0) { -; CHECK-LABEL: foo -; CHECK: movaps %xmm0, %xmm1 -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: callq bar -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ss %eax, %xmm0 -; CHECK-NEXT: addss %xmm0, %xmm1 -; CHECK: retq +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: callq bar +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-NEXT: addss %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq %call = call i32 @bar(i32 %a0, i32 %a1, float %b0) #0 %c0 = add i32 %a0, %call %c1 = add i32 %c0, %a1 Index: test/DebugInfo/X86/live-debug-variables.ll =================================================================== --- test/DebugInfo/X86/live-debug-variables.ll +++ test/DebugInfo/X86/live-debug-variables.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -filetype=obj -o - %s | llvm-dwarfdump -debug-loc - | FileCheck %s ; The test inlines the function F four times, with each inlined variable for