diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2149,6 +2149,7 @@ setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); setLibcallName(RTLIB::MUL_I128, nullptr); + setLibcallName(RTLIB::MULO_I64, nullptr); setLibcallName(RTLIB::MULO_I128, nullptr); } diff --git a/llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/overflow-intrinsic-optimizations.ll @@ -0,0 +1,19 @@ +; RUN: llc %s -mtriple=i386 -o - | FileCheck %s + +define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) { +; CHECK-LABEL: no__mulodi4 +; CHECK-NOT: calll __mulodi4 +entry: + %0 = sext i32 %a to i64 + %1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b) + %2 = extractvalue { i64, i1 } %1, 1 + %3 = extractvalue { i64, i1 } %1, 0 + %4 = trunc i64 %3 to i32 + %5 = sext i32 %4 to i64 + %6 = icmp ne i64 %3, %5 + %7 = or i1 %2, %6 + store i32 %4, i32* %c, align 4 + ret i1 %7 +} + +declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -360,47 +360,93 @@ ; ; X86-LABEL: func5: ; X86: # %bb.0: -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ebp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ebx ; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: pushl %eax +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 28 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl $0, (%esp) -; X86-NEXT: movl %esp, %edi -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %edx -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %ecx -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: calll __mulodi4 -; X86-NEXT: addl $20, %esp -; X86-NEXT: .cfi_adjust_cfa_offset -20 -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %edx, %edx -; X86-NEXT: setns %cl -; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: imull %ebx, %esi +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: addl %esi, %edx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: imull %ecx, %ebx +; X86-NEXT: addl %edx, %ebx ; X86-NEXT: sarl $31, %esi -; X86-NEXT: cmpl $0, (%esp) -; X86-NEXT: cmovnel %esi, %eax -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: addl $4, %esp -; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: imull %ebp, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: imull %ebp, %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: adcl %ebx, %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: addl %edi, %ebx +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: setb %bl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %edi, %eax +; X86-NEXT: movzbl %bl, %edx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %esi, %ecx +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %esi, %ecx +; X86-NEXT: xorl %esi, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl %ebp, %ebp +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel %ebp, %edx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0) diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -208,33 +208,78 @@ ; ; WIN32-LABEL: smuloi64: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: subl $8, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi -; WIN32-NEXT: movl $0, (%esp) -; WIN32-NEXT: movl %esp, %ebx -; WIN32-NEXT: pushl %ebx -; WIN32-NEXT: pushl %edi -; WIN32-NEXT: pushl %edx -; WIN32-NEXT: pushl %ecx -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: calll ___mulodi4 -; WIN32-NEXT: addl $20, %esp -; WIN32-NEXT: cmpl $0, (%esp) -; WIN32-NEXT: setne %cl -; WIN32-NEXT: movl %edx, 4(%esi) -; WIN32-NEXT: movl %eax, (%esi) -; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: addl $4, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: imull %ecx, %esi +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %esi, %edx +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: imull %edi, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: sarl $31, %esi +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: imull %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: imull %ebx, %esi +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: adcl %ecx, %esi +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: addl %ebx, %ecx +; WIN32-NEXT: adcl $0, %ebp +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: adcl %ebp, %ebx +; WIN32-NEXT: setb %cl +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: addl %ebx, %eax +; WIN32-NEXT: movzbl %cl, %ecx +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: movl %edi, %ecx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: xorl %ecx, %edx +; WIN32-NEXT: xorl %eax, %ecx +; WIN32-NEXT: orl %edx, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %edi, 4(%eax) +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; WIN32-NEXT: movl %ecx, (%eax) +; WIN32-NEXT: setne %al +; WIN32-NEXT: addl $8, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp ; WIN32-NEXT: retl %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -528,27 +573,68 @@ ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi ; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %ebp, %edi +; WIN32-NEXT: imull %ecx, %edi +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: imull %ebx, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: sarl $31, %ebx +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: imull %esi, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: mull %esi +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: imull %esi, %ebx +; WIN32-NEXT: addl %edx, %ebx +; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: adcl %ecx, %ebx +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: addl %esi, %ecx +; WIN32-NEXT: adcl $0, %ebp +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: adcl %ebp, %esi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; WIN32-NEXT: movl $0, (%esp) -; WIN32-NEXT: movl %esp, %eax -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: pushl %ebp -; WIN32-NEXT: pushl %ebx -; WIN32-NEXT: pushl %edi -; WIN32-NEXT: pushl %esi -; WIN32-NEXT: calll ___mulodi4 -; WIN32-NEXT: addl $20, %esp -; WIN32-NEXT: cmpl $0, (%esp) +; WIN32-NEXT: setb %cl +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: movzbl %cl, %ecx +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %ebx, %edx +; WIN32-NEXT: sarl $31, %edi +; WIN32-NEXT: xorl %edi, %edx +; WIN32-NEXT: xorl %eax, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: orl %edx, %edi ; WIN32-NEXT: jne LBB12_2 ; WIN32-NEXT: # %bb.1: -; WIN32-NEXT: movl %ebx, %esi -; WIN32-NEXT: movl %ebp, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; WIN32-NEXT: LBB12_2: -; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: movl %edi, %edx +; WIN32-NEXT: movl %ebp, %edx ; WIN32-NEXT: addl $4, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi @@ -904,23 +990,66 @@ ; ; WIN32-LABEL: smulobri64: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi ; WIN32-NEXT: pushl %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: movl $0, (%esp) -; WIN32-NEXT: movl %esp, %edi -; WIN32-NEXT: pushl %edi -; WIN32-NEXT: pushl %esi -; WIN32-NEXT: pushl %edx -; WIN32-NEXT: pushl %ecx -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: calll ___mulodi4 -; WIN32-NEXT: addl $20, %esp -; WIN32-NEXT: cmpl $0, (%esp) +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: imull %ecx, %esi +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %eax, %ebp +; WIN32-NEXT: addl %esi, %edx +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: imull %edi, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: sarl $31, %esi +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: imull %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: imull %ebx, %esi +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: adcl %ecx, %esi +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: addl %ebp, %ecx +; WIN32-NEXT: adcl $0, %ebx +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: adcl %ebx, %ebp +; WIN32-NEXT: setb %cl +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: addl %ebp, %eax +; WIN32-NEXT: movzbl %cl, %ecx +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: sarl $31, %edi +; WIN32-NEXT: xorl %edi, %edx +; WIN32-NEXT: xorl %eax, %edi +; WIN32-NEXT: orl %edx, %edi ; WIN32-NEXT: jne LBB18_1 ; WIN32-NEXT: # %bb.3: # %continue ; WIN32-NEXT: movb $1, %al @@ -928,6 +1057,8 @@ ; WIN32-NEXT: addl $4, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp ; WIN32-NEXT: retl ; WIN32-NEXT: LBB18_1: # %overflow ; WIN32-NEXT: xorl %eax, %eax @@ -1567,34 +1698,79 @@ ; ; WIN32-LABEL: smuloi64_load: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: subl $16, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl (%eax), %esi +; WIN32-NEXT: movl 4(%eax), %ebp +; WIN32-NEXT: sarl $31, %ebx +; WIN32-NEXT: movl %ebx, %ecx +; WIN32-NEXT: imull %ebp, %ecx +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: mull %esi +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: addl %ecx, %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %esi, %ebx +; WIN32-NEXT: addl %edx, %ebx +; WIN32-NEXT: movl %ebp, %ecx +; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: imull %ecx, %edi +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: imull %edi, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: adcl %ebx, %ecx +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: mull %edi +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: mull %edi +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ebx, %edi +; WIN32-NEXT: adcl $0, %ebp +; WIN32-NEXT: movl %esi, %eax ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movl (%edx), %edi -; WIN32-NEXT: movl 4(%edx), %edx -; WIN32-NEXT: movl $0, (%esp) -; WIN32-NEXT: movl %esp, %ebx -; WIN32-NEXT: pushl %ebx -; WIN32-NEXT: pushl %ecx -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: pushl %edx -; WIN32-NEXT: pushl %edi -; WIN32-NEXT: calll ___mulodi4 -; WIN32-NEXT: addl $20, %esp -; WIN32-NEXT: cmpl $0, (%esp) -; WIN32-NEXT: setne %cl -; WIN32-NEXT: movl %eax, (%esi) -; WIN32-NEXT: movl %edx, 4(%esi) -; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: addl $4, %esp +; WIN32-NEXT: mull %edx +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: movl %eax, %ebx +; WIN32-NEXT: addl %edi, %ebx +; WIN32-NEXT: adcl %ebp, %esi +; WIN32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; WIN32-NEXT: mull {{[0-9]+}}(%esp) +; WIN32-NEXT: addl %esi, %eax +; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: movl %ebx, %ecx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: xorl %ecx, %edx +; WIN32-NEXT: xorl %eax, %ecx +; WIN32-NEXT: orl %edx, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %ebx, 4(%eax) +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; WIN32-NEXT: movl %ecx, (%eax) +; WIN32-NEXT: setne %al +; WIN32-NEXT: addl $16, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp ; WIN32-NEXT: retl %v1 = load i64, i64* %ptr1 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -1630,34 +1806,80 @@ ; ; WIN32-LABEL: smuloi64_load2: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp ; WIN32-NEXT: pushl %ebx ; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: subl $12, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movl (%edx), %edi -; WIN32-NEXT: movl 4(%edx), %edx -; WIN32-NEXT: movl $0, (%esp) -; WIN32-NEXT: movl %esp, %ebx -; WIN32-NEXT: pushl %ebx -; WIN32-NEXT: pushl %edx -; WIN32-NEXT: pushl %edi -; WIN32-NEXT: pushl %ecx -; WIN32-NEXT: pushl %eax -; WIN32-NEXT: calll ___mulodi4 -; WIN32-NEXT: addl $20, %esp -; WIN32-NEXT: cmpl $0, (%esp) -; WIN32-NEXT: setne %cl -; WIN32-NEXT: movl %eax, (%esi) -; WIN32-NEXT: movl %edx, 4(%esi) -; WIN32-NEXT: movl %ecx, %eax -; WIN32-NEXT: addl $4, %esp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl (%eax), %ebp +; WIN32-NEXT: movl 4(%eax), %edi +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: movl %ebp, %esi +; WIN32-NEXT: imull %ecx, %esi +; WIN32-NEXT: movl %ebp, %eax +; WIN32-NEXT: mull %ecx +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: addl %esi, %edx +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: imull %edi, %ecx +; WIN32-NEXT: addl %edx, %ecx +; WIN32-NEXT: sarl $31, %esi +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: imull %ebx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: mull %ebx +; WIN32-NEXT: addl %edi, %edx +; WIN32-NEXT: imull %ebx, %esi +; WIN32-NEXT: addl %edx, %esi +; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: adcl %ecx, %esi +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull %ebp +; WIN32-NEXT: movl %edx, %ebp +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: addl %ebx, %ecx +; WIN32-NEXT: adcl $0, %ebp +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; WIN32-NEXT: movl %edx, %ebx +; WIN32-NEXT: movl %eax, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: adcl %ebp, %ebx +; WIN32-NEXT: setb %cl +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; WIN32-NEXT: addl %ebx, %eax +; WIN32-NEXT: movzbl %cl, %ecx +; WIN32-NEXT: adcl %ecx, %edx +; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; WIN32-NEXT: adcl %esi, %edx +; WIN32-NEXT: movl %edi, %ecx +; WIN32-NEXT: sarl $31, %ecx +; WIN32-NEXT: xorl %ecx, %edx +; WIN32-NEXT: xorl %eax, %ecx +; WIN32-NEXT: orl %edx, %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %edi, 4(%eax) +; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload +; WIN32-NEXT: movl %ecx, (%eax) +; WIN32-NEXT: setne %al +; WIN32-NEXT: addl $12, %esp ; WIN32-NEXT: popl %esi ; WIN32-NEXT: popl %edi ; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp ; WIN32-NEXT: retl %v2 = load i64, i64* %ptr2 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)