diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1893,19 +1893,16 @@ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); } - // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't - // handle type legalization for these operations here. - // - // FIXME: We really should do custom legalization for addition and - // subtraction on x86-32 once PR3203 is fixed. We really can't do much better - // than generic legalization for 64-bit multiplication-with-overflow, though. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + setOperationAction(ISD::SADDO, VT, Custom); + setOperationAction(ISD::SSUBO, VT, Custom); + + // Only custom-lower other 64-bit operations on 64-bit because we don't + // handle type legalization for these operations here. if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; // Add/Sub/Mul with overflow operations are custom lowered. - setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::UADDO, VT, Custom); - setOperationAction(ISD::SSUBO, VT, Custom); setOperationAction(ISD::USUBO, VT, Custom); setOperationAction(ISD::SMULO, VT, Custom); setOperationAction(ISD::UMULO, VT, Custom); @@ -1916,6 +1913,11 @@ setOperationAction(ISD::SETCCCARRY, VT, Custom); } + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SADDO, MVT::i128, Custom); + setOperationAction(ISD::SSUBO, MVT::i128, Custom); + } + if (!Subtarget.is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); @@ -29882,6 +29884,33 @@ Results.push_back(Chain); return; } + case ISD::SADDO: + case ISD::SSUBO: { + EVT VT = N->getValueType(0); + EVT NVT = getTypeToTransformTo(*DAG.getContext(), VT); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + SDValue LHSLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, LHS, + DAG.getIntPtrConstant(0, dl)); + SDValue LHSHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, LHS, + DAG.getIntPtrConstant(1, dl)); + SDValue RHSLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, RHS, + DAG.getIntPtrConstant(0, dl)); + SDValue RHSHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, RHS, + DAG.getIntPtrConstant(1, dl)); + SDVTList VTs = DAG.getVTList(NVT, MVT::i32); + unsigned LoOpc = N->getOpcode() == ISD::SADDO ? X86ISD::ADD : X86ISD::SUB; + unsigned HiOpc = N->getOpcode() == ISD::SADDO ? X86ISD::ADC : X86ISD::SBB; + SDValue Lo = DAG.getNode(LoOpc, dl, VTs, LHSLo, RHSLo); + SDValue Hi = DAG.getNode(HiOpc, dl, VTs, LHSHi, RHSHi, Lo.getValue(1)); + SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SDValue Setcc = getSETCC(X86::COND_O, Hi.getValue(1), dl, DAG); + Setcc = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(1), Setcc); + Results.push_back(Res); + Results.push_back(Setcc); + return; + } case ISD::CTPOP: { assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); // Use a v2i64 if possible. diff --git a/llvm/test/CodeGen/X86/known-bits.ll b/llvm/test/CodeGen/X86/known-bits.ll --- a/llvm/test/CodeGen/X86/known-bits.ll +++ b/llvm/test/CodeGen/X86/known-bits.ll @@ -139,26 +139,19 @@ define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { ; X32-LABEL: knownbits_uaddo_saddo: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %eax, %edx -; X32-NEXT: setb %bl -; X32-NEXT: testl %eax, %eax -; X32-NEXT: setns %al -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: setns %cl -; X32-NEXT: cmpb %al, %cl -; X32-NEXT: sete %al -; X32-NEXT: testl %edx, %edx -; X32-NEXT: setns %dl -; X32-NEXT: cmpb %dl, %cl -; X32-NEXT: setne %dl -; X32-NEXT: andb %al, %dl -; X32-NEXT: orb %bl, %dl +; X32-NEXT: setb %dh +; X32-NEXT: xorl %esi, %esi +; X32-NEXT: addl $0, %esi +; X32-NEXT: adcl %eax, %ecx +; X32-NEXT: seto %dl +; X32-NEXT: orb %dh, %dl ; X32-NEXT: xorl %eax, %eax -; X32-NEXT: popl %ebx +; X32-NEXT: popl %esi ; X32-NEXT: retl ; ; X64-LABEL: knownbits_uaddo_saddo: @@ -190,22 +183,18 @@ define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind { ; X32-LABEL: knownbits_usubo_ssubo: ; X32: # %bb.0: +; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: cmpl %eax, %ecx ; X32-NEXT: setb %dh -; X32-NEXT: setns %dl -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: setns %cl -; X32-NEXT: cmpb %dl, %cl -; X32-NEXT: setne %ch -; X32-NEXT: testl %eax, %eax -; X32-NEXT: setns %al -; X32-NEXT: cmpb %al, %cl -; X32-NEXT: setne %dl -; X32-NEXT: andb %ch, %dl +; X32-NEXT: xorl %esi, %esi +; X32-NEXT: negl %esi +; X32-NEXT: sbbl %eax, %ecx +; X32-NEXT: seto %dl ; X32-NEXT: orb %dh, %dl ; X32-NEXT: xorl %eax, %eax +; X32-NEXT: popl %esi ; X32-NEXT: retl ; ; X64-LABEL: knownbits_usubo_ssubo: diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll --- a/llvm/test/CodeGen/X86/sadd_sat.ll +++ b/llvm/test/CodeGen/X86/sadd_sat.ll @@ -42,38 +42,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: sete %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func2: diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -44,38 +44,25 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-LABEL: func64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: sete %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func64: diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1940,124 +1940,78 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r13 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx -; SSE-NEXT: movq %r8, %r13 -; SSE-NEXT: adcq %r14, %r13 -; SSE-NEXT: movq %r13, %r10 -; SSE-NEXT: sarq $63, %r10 -; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: testq %r13, %r13 -; SSE-NEXT: setns %dil -; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; SSE-NEXT: leaq (%rdi,%r12), %r15 +; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: seto %r10b +; SSE-NEXT: movq %r8, %rbx +; SSE-NEXT: sarq $63, %rbx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %rcx, %rbx +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: testq %r8, %r8 -; SSE-NEXT: setns %r8b -; SSE-NEXT: cmpb %dil, %r8b -; SSE-NEXT: setne %dil -; SSE-NEXT: testq %r14, %r14 -; SSE-NEXT: setns %bl -; SSE-NEXT: cmpb %bl, %r8b -; SSE-NEXT: sete %bl -; SSE-NEXT: testb %dil, %bl -; SSE-NEXT: cmoveq %r13, %r15 -; SSE-NEXT: cmoveq %rcx, %r10 +; SSE-NEXT: setns %cl +; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; SSE-NEXT: addq %r11, %rcx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %r8, %rcx ; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: seto %r8b ; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: adcq %r11, %rdi -; SSE-NEXT: setns %bl -; SSE-NEXT: movzbl %bl, %ebx -; SSE-NEXT: addq %rbx, %r12 -; SSE-NEXT: movq %rdi, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: setns %r8b +; SSE-NEXT: sarq $63, %rdi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rsi, %rdi +; SSE-NEXT: xorl %esi, %esi ; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: setns %dl -; SSE-NEXT: cmpb %r8b, %dl -; SSE-NEXT: sete %r8b -; SSE-NEXT: cmpb %bl, %dl -; SSE-NEXT: setne %dl -; SSE-NEXT: testb %dl, %r8b -; SSE-NEXT: cmoveq %rsi, %rcx -; SSE-NEXT: cmoveq %rdi, %r12 -; SSE-NEXT: movq %r15, 24(%rax) -; SSE-NEXT: movq %r10, 16(%rax) -; SSE-NEXT: movq %r12, 8(%rax) -; SSE-NEXT: movq %rcx, (%rax) +; SSE-NEXT: setns %sil +; SSE-NEXT: addq %r11, %rsi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rdx, %rsi +; SSE-NEXT: movq %rbx, 16(%rax) +; SSE-NEXT: movq %rdi, (%rax) +; SSE-NEXT: movq %rcx, 24(%rax) +; SSE-NEXT: movq %rsi, 8(%rax) ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r13 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 ; SSE-NEXT: retq ; ; AVX-LABEL: v2i128: ; AVX: # %bb.0: -; AVX-NEXT: pushq %r15 -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %r13 -; AVX-NEXT: pushq %r12 ; AVX-NEXT: pushq %rbx ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %r8, %r13 -; AVX-NEXT: adcq %r14, %r13 -; AVX-NEXT: movq %r13, %r10 -; AVX-NEXT: sarq $63, %r10 -; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: testq %r13, %r13 -; AVX-NEXT: setns %dil -; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; AVX-NEXT: leaq (%rdi,%r12), %r15 +; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; AVX-NEXT: seto %r10b +; AVX-NEXT: movq %r8, %rbx +; AVX-NEXT: sarq $63, %rbx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %rcx, %rbx +; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: testq %r8, %r8 -; AVX-NEXT: setns %r8b -; AVX-NEXT: cmpb %dil, %r8b -; AVX-NEXT: setne %dil -; AVX-NEXT: testq %r14, %r14 -; AVX-NEXT: setns %bl -; AVX-NEXT: cmpb %bl, %r8b -; AVX-NEXT: sete %bl -; AVX-NEXT: testb %dil, %bl -; AVX-NEXT: cmoveq %r13, %r15 -; AVX-NEXT: cmoveq %rcx, %r10 +; AVX-NEXT: setns %cl +; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; AVX-NEXT: addq %r11, %rcx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %r8, %rcx ; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: seto %r8b ; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: adcq %r11, %rdi -; AVX-NEXT: setns %bl -; AVX-NEXT: movzbl %bl, %ebx -; AVX-NEXT: addq %rbx, %r12 -; AVX-NEXT: movq %rdi, %rcx -; AVX-NEXT: sarq $63, %rcx -; AVX-NEXT: testq %r11, %r11 -; AVX-NEXT: setns %r8b +; AVX-NEXT: sarq $63, %rdi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rsi, %rdi +; AVX-NEXT: xorl %esi, %esi ; AVX-NEXT: testq %rdx, %rdx -; AVX-NEXT: setns %dl -; AVX-NEXT: cmpb %r8b, %dl -; AVX-NEXT: sete %r8b -; AVX-NEXT: cmpb %bl, %dl -; AVX-NEXT: setne %dl -; AVX-NEXT: testb %dl, %r8b -; AVX-NEXT: cmoveq %rsi, %rcx -; AVX-NEXT: cmoveq %rdi, %r12 -; AVX-NEXT: movq %r15, 24(%rax) -; AVX-NEXT: movq %r10, 16(%rax) -; AVX-NEXT: movq %r12, 8(%rax) -; AVX-NEXT: movq %rcx, (%rax) +; AVX-NEXT: setns %sil +; AVX-NEXT: addq %r11, %rsi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rdx, %rsi +; AVX-NEXT: movq %rbx, 16(%rax) +; AVX-NEXT: movq %rdi, (%rax) +; AVX-NEXT: movq %rcx, 24(%rax) +; AVX-NEXT: movq %rsi, 8(%rax) ; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r12 -; AVX-NEXT: popq %r13 -; AVX-NEXT: popq %r14 -; AVX-NEXT: popq %r15 ; AVX-NEXT: retq %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll --- a/llvm/test/CodeGen/X86/ssub_sat.ll +++ b/llvm/test/CodeGen/X86/ssub_sat.ll @@ -38,38 +38,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: setne %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func2: diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll --- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll @@ -40,38 +40,25 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-LABEL: func64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: setne %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func64: diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -2145,124 +2145,78 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r13 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx -; SSE-NEXT: movq %r8, %r13 -; SSE-NEXT: sbbq %r14, %r13 -; SSE-NEXT: movq %r13, %r10 -; SSE-NEXT: sarq $63, %r10 -; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: testq %r13, %r13 -; SSE-NEXT: setns %dil -; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; SSE-NEXT: leaq (%rdi,%r12), %r15 +; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: seto %r10b +; SSE-NEXT: movq %r8, %rbx +; SSE-NEXT: sarq $63, %rbx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %rcx, %rbx +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: testq %r8, %r8 -; SSE-NEXT: setns %r8b -; SSE-NEXT: cmpb %dil, %r8b -; SSE-NEXT: setne %dil -; SSE-NEXT: testq %r14, %r14 -; SSE-NEXT: setns %bl -; SSE-NEXT: cmpb %bl, %r8b -; SSE-NEXT: setne %bl -; SSE-NEXT: testb %dil, %bl -; SSE-NEXT: cmoveq %r13, %r15 -; SSE-NEXT: cmoveq %rcx, %r10 +; SSE-NEXT: setns %cl +; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; SSE-NEXT: addq %r11, %rcx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %r8, %rcx ; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: seto %r8b ; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: sbbq %r11, %rdi -; SSE-NEXT: setns %bl -; SSE-NEXT: movzbl %bl, %ebx -; SSE-NEXT: addq %rbx, %r12 -; SSE-NEXT: movq %rdi, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: setns %r8b +; SSE-NEXT: sarq $63, %rdi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rsi, %rdi +; SSE-NEXT: xorl %esi, %esi ; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: setns %dl -; SSE-NEXT: cmpb %r8b, %dl -; SSE-NEXT: setne %r8b -; SSE-NEXT: cmpb %bl, %dl -; SSE-NEXT: setne %dl -; SSE-NEXT: testb %dl, %r8b -; SSE-NEXT: cmoveq %rsi, %rcx -; SSE-NEXT: cmoveq %rdi, %r12 -; SSE-NEXT: movq %r15, 24(%rax) -; SSE-NEXT: movq %r10, 16(%rax) -; SSE-NEXT: movq %r12, 8(%rax) -; SSE-NEXT: movq %rcx, (%rax) +; SSE-NEXT: setns %sil +; SSE-NEXT: addq %r11, %rsi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rdx, %rsi +; SSE-NEXT: movq %rbx, 16(%rax) +; SSE-NEXT: movq %rdi, (%rax) +; SSE-NEXT: movq %rcx, 24(%rax) +; SSE-NEXT: movq %rsi, 8(%rax) ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r13 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 ; SSE-NEXT: retq ; ; AVX-LABEL: v2i128: ; AVX: # %bb.0: -; AVX-NEXT: pushq %r15 -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %r13 -; AVX-NEXT: pushq %r12 ; AVX-NEXT: pushq %rbx ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %r8, %r13 -; AVX-NEXT: sbbq %r14, %r13 -; AVX-NEXT: movq %r13, %r10 -; AVX-NEXT: sarq $63, %r10 -; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: testq %r13, %r13 -; AVX-NEXT: setns %dil -; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; AVX-NEXT: leaq (%rdi,%r12), %r15 +; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 +; AVX-NEXT: seto %r10b +; AVX-NEXT: movq %r8, %rbx +; AVX-NEXT: sarq $63, %rbx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %rcx, %rbx +; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: testq %r8, %r8 -; AVX-NEXT: setns %r8b -; AVX-NEXT: cmpb %dil, %r8b -; AVX-NEXT: setne %dil -; AVX-NEXT: testq %r14, %r14 -; AVX-NEXT: setns %bl -; AVX-NEXT: cmpb %bl, %r8b -; AVX-NEXT: setne %bl -; AVX-NEXT: testb %dil, %bl -; AVX-NEXT: cmoveq %r13, %r15 -; AVX-NEXT: cmoveq %rcx, %r10 +; AVX-NEXT: setns %cl +; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; AVX-NEXT: addq %r11, %rcx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %r8, %rcx ; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: seto %r8b ; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: sbbq %r11, %rdi -; AVX-NEXT: setns %bl -; AVX-NEXT: movzbl %bl, %ebx -; AVX-NEXT: addq %rbx, %r12 -; AVX-NEXT: movq %rdi, %rcx -; AVX-NEXT: sarq $63, %rcx -; AVX-NEXT: testq %r11, %r11 -; AVX-NEXT: setns %r8b +; AVX-NEXT: sarq $63, %rdi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rsi, %rdi +; AVX-NEXT: xorl %esi, %esi ; AVX-NEXT: testq %rdx, %rdx -; AVX-NEXT: setns %dl -; AVX-NEXT: cmpb %r8b, %dl -; AVX-NEXT: setne %r8b -; AVX-NEXT: cmpb %bl, %dl -; AVX-NEXT: setne %dl -; AVX-NEXT: testb %dl, %r8b -; AVX-NEXT: cmoveq %rsi, %rcx -; AVX-NEXT: cmoveq %rdi, %r12 -; AVX-NEXT: movq %r15, 24(%rax) -; AVX-NEXT: movq %r10, 16(%rax) -; AVX-NEXT: movq %r12, 8(%rax) -; AVX-NEXT: movq %rcx, (%rax) +; AVX-NEXT: setns %sil +; AVX-NEXT: addq %r11, %rsi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rdx, %rsi +; AVX-NEXT: movq %rbx, 16(%rax) +; AVX-NEXT: movq %rdi, (%rax) +; AVX-NEXT: movq %rcx, 24(%rax) +; AVX-NEXT: movq %rsi, 8(%rax) ; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r12 -; AVX-NEXT: popq %r13 -; AVX-NEXT: popq %r14 -; AVX-NEXT: popq %r15 ; AVX-NEXT: retq %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -1145,275 +1145,131 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind { ; SSE2-LABEL: saddo_v2i128: ; SSE2: # %bb.0: -; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE2-NEXT: testq %r9, %r9 -; SSE2-NEXT: setns %al -; SSE2-NEXT: testq %rsi, %rsi -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: sete %bpl ; SSE2-NEXT: addq %r8, %rdi ; SSE2-NEXT: adcq %r9, %rsi -; SSE2-NEXT: setns %al -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %al -; SSE2-NEXT: andb %bpl, %al +; SSE2-NEXT: seto %r8b ; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSE2-NEXT: movq %rcx, %rbp -; SSE2-NEXT: adcq %r10, %rbp -; SSE2-NEXT: setns %bl -; SSE2-NEXT: testq %rcx, %rcx -; SSE2-NEXT: setns %cl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %r8b -; SSE2-NEXT: testq %r10, %r10 -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: sete %cl -; SSE2-NEXT: andb %r8b, %cl -; SSE2-NEXT: movzbl %cl, %ecx -; SSE2-NEXT: negl %ecx -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: seto %al ; SSE2-NEXT: movzbl %al, %eax ; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movzbl %r8b, %eax +; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movq %rdx, 16(%r11) -; SSE2-NEXT: movq %rdi, (%r11) -; SSE2-NEXT: movq %rbp, 24(%r11) -; SSE2-NEXT: movq %rsi, 8(%r11) -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %rbp +; SSE2-NEXT: movq %rdx, 16(%r10) +; SSE2-NEXT: movq %rdi, (%r10) +; SSE2-NEXT: movq %rcx, 24(%r10) +; SSE2-NEXT: movq %rsi, 8(%r10) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: saddo_v2i128: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pushq %rbp -; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSSE3-NEXT: testq %r9, %r9 -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: testq %rsi, %rsi -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: sete %bpl ; SSSE3-NEXT: addq %r8, %rdi ; SSSE3-NEXT: adcq %r9, %rsi -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %al -; SSSE3-NEXT: andb %bpl, %al +; SSSE3-NEXT: seto %r8b ; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSSE3-NEXT: movq %rcx, %rbp -; SSSE3-NEXT: adcq %r10, %rbp -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: testq %rcx, %rcx -; SSSE3-NEXT: setns %cl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %r8b -; SSSE3-NEXT: testq %r10, %r10 -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: sete %cl -; SSSE3-NEXT: andb %r8b, %cl -; SSSE3-NEXT: movzbl %cl, %ecx -; SSSE3-NEXT: negl %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: seto %al ; SSSE3-NEXT: movzbl %al, %eax ; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: movzbl %r8b, %eax +; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movq %rdx, 16(%r11) -; SSSE3-NEXT: movq %rdi, (%r11) -; SSSE3-NEXT: movq %rbp, 24(%r11) -; SSSE3-NEXT: movq %rsi, 8(%r11) -; SSSE3-NEXT: popq %rbx -; SSSE3-NEXT: popq %rbp +; SSSE3-NEXT: movq %rdx, 16(%r10) +; SSSE3-NEXT: movq %rdi, (%r10) +; SSSE3-NEXT: movq %rcx, 24(%r10) +; SSSE3-NEXT: movq %rsi, 8(%r10) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: saddo_v2i128: ; SSE41: # %bb.0: -; SSE41-NEXT: pushq %rbp -; SSE41-NEXT: pushq %rbx -; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE41-NEXT: testq %r9, %r9 -; SSE41-NEXT: setns %al -; SSE41-NEXT: testq %rsi, %rsi -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: sete %bpl ; SSE41-NEXT: addq %r8, %rdi ; SSE41-NEXT: adcq %r9, %rsi -; SSE41-NEXT: setns %al -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %al -; SSE41-NEXT: andb %bpl, %al +; SSE41-NEXT: seto %r8b ; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSE41-NEXT: movq %rcx, %rbp -; SSE41-NEXT: adcq %r10, %rbp -; SSE41-NEXT: setns %bl -; SSE41-NEXT: testq %rcx, %rcx -; SSE41-NEXT: setns %cl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %r8b -; SSE41-NEXT: testq %r10, %r10 -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: sete %cl -; SSE41-NEXT: andb %r8b, %cl -; SSE41-NEXT: movzbl %cl, %ecx -; SSE41-NEXT: negl %ecx -; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSE41-NEXT: seto %al +; SSE41-NEXT: movzbl %al, %r9d +; SSE41-NEXT: negl %r9d +; SSE41-NEXT: movzbl %r8b, %eax ; SSE41-NEXT: negl %eax ; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rdx, 16(%r11) -; SSE41-NEXT: movq %rdi, (%r11) -; SSE41-NEXT: movq %rbp, 24(%r11) -; SSE41-NEXT: movq %rsi, 8(%r11) -; SSE41-NEXT: popq %rbx -; SSE41-NEXT: popq %rbp +; SSE41-NEXT: pinsrd $1, %r9d, %xmm0 +; SSE41-NEXT: movq %rdx, 16(%r10) +; SSE41-NEXT: movq %rdi, (%r10) +; SSE41-NEXT: movq %rcx, 24(%r10) +; SSE41-NEXT: movq %rsi, 8(%r10) ; SSE41-NEXT: retq ; ; AVX1-LABEL: saddo_v2i128: ; AVX1: # %bb.0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX1-NEXT: testq %r9, %r9 -; AVX1-NEXT: setns %al -; AVX1-NEXT: testq %rsi, %rsi -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: sete %bpl ; AVX1-NEXT: addq %r8, %rdi ; AVX1-NEXT: adcq %r9, %rsi -; AVX1-NEXT: setns %al -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %al -; AVX1-NEXT: andb %bpl, %al +; AVX1-NEXT: seto %r8b ; AVX1-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX1-NEXT: movq %rcx, %rbp -; AVX1-NEXT: adcq %r10, %rbp -; AVX1-NEXT: setns %bl -; AVX1-NEXT: testq %rcx, %rcx -; AVX1-NEXT: setns %cl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %r8b -; AVX1-NEXT: testq %r10, %r10 -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: sete %cl -; AVX1-NEXT: andb %r8b, %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: negl %ecx -; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX1-NEXT: seto %al +; AVX1-NEXT: movzbl %al, %r9d +; AVX1-NEXT: negl %r9d +; AVX1-NEXT: movzbl %r8b, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rdx, 16(%r11) -; AVX1-NEXT: movq %rdi, (%r11) -; AVX1-NEXT: movq %rbp, 24(%r11) -; AVX1-NEXT: movq %rsi, 8(%r11) -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: movq %rdx, 16(%r10) +; AVX1-NEXT: movq %rdi, (%r10) +; AVX1-NEXT: movq %rcx, 24(%r10) +; AVX1-NEXT: movq %rsi, 8(%r10) ; AVX1-NEXT: retq ; ; AVX2-LABEL: saddo_v2i128: ; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: setns %al -; AVX2-NEXT: testq %rsi, %rsi -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: sete %bpl ; AVX2-NEXT: addq %r8, %rdi ; AVX2-NEXT: adcq %r9, %rsi -; AVX2-NEXT: setns %al -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %al -; AVX2-NEXT: andb %bpl, %al +; AVX2-NEXT: seto %r8b ; AVX2-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX2-NEXT: movq %rcx, %rbp -; AVX2-NEXT: adcq %r10, %rbp -; AVX2-NEXT: setns %bl -; AVX2-NEXT: testq %rcx, %rcx -; AVX2-NEXT: setns %cl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %r8b -; AVX2-NEXT: testq %r10, %r10 -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: sete %cl -; AVX2-NEXT: andb %r8b, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: seto %al +; AVX2-NEXT: movzbl %al, %r9d +; AVX2-NEXT: negl %r9d +; AVX2-NEXT: movzbl %r8b, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rdx, 16(%r11) -; AVX2-NEXT: movq %rdi, (%r11) -; AVX2-NEXT: movq %rbp, 24(%r11) -; AVX2-NEXT: movq %rsi, 8(%r11) -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: movq %rdx, 16(%r10) +; AVX2-NEXT: movq %rdi, (%r10) +; AVX2-NEXT: movq %rcx, 24(%r10) +; AVX2-NEXT: movq %rsi, 8(%r10) ; AVX2-NEXT: retq ; ; AVX512-LABEL: saddo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %r14 -; AVX512-NEXT: adcq %r11, %r14 -; AVX512-NEXT: setns %bl -; AVX512-NEXT: testq %rcx, %rcx -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %bl -; AVX512-NEXT: testq %r11, %r11 -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: sete %al -; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: seto %al ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: sete %al ; AVX512-NEXT: addq %r8, %rdi ; AVX512-NEXT: adcq %r9, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %cl -; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: andl $1, %ecx -; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: seto %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) -; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rcx, 24(%r10) ; AVX512-NEXT: movq %rsi, 8(%r10) -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -1154,275 +1154,131 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind { ; SSE2-LABEL: ssubo_v2i128: ; SSE2: # %bb.0: -; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE2-NEXT: testq %r9, %r9 -; SSE2-NEXT: setns %al -; SSE2-NEXT: testq %rsi, %rsi -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %bpl ; SSE2-NEXT: subq %r8, %rdi ; SSE2-NEXT: sbbq %r9, %rsi -; SSE2-NEXT: setns %al -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %al -; SSE2-NEXT: andb %bpl, %al +; SSE2-NEXT: seto %r8b ; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSE2-NEXT: movq %rcx, %rbp -; SSE2-NEXT: sbbq %r10, %rbp -; SSE2-NEXT: setns %bl -; SSE2-NEXT: testq %rcx, %rcx -; SSE2-NEXT: setns %cl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %r8b -; SSE2-NEXT: testq %r10, %r10 -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %cl -; SSE2-NEXT: andb %r8b, %cl -; SSE2-NEXT: movzbl %cl, %ecx -; SSE2-NEXT: negl %ecx -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: seto %al ; SSE2-NEXT: movzbl %al, %eax ; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movzbl %r8b, %eax +; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movq %rdx, 16(%r11) -; SSE2-NEXT: movq %rdi, (%r11) -; SSE2-NEXT: movq %rbp, 24(%r11) -; SSE2-NEXT: movq %rsi, 8(%r11) -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %rbp +; SSE2-NEXT: movq %rdx, 16(%r10) +; SSE2-NEXT: movq %rdi, (%r10) +; SSE2-NEXT: movq %rcx, 24(%r10) +; SSE2-NEXT: movq %rsi, 8(%r10) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: ssubo_v2i128: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pushq %rbp -; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSSE3-NEXT: testq %r9, %r9 -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: testq %rsi, %rsi -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %bpl ; SSSE3-NEXT: subq %r8, %rdi ; SSSE3-NEXT: sbbq %r9, %rsi -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %al -; SSSE3-NEXT: andb %bpl, %al +; SSSE3-NEXT: seto %r8b ; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSSE3-NEXT: movq %rcx, %rbp -; SSSE3-NEXT: sbbq %r10, %rbp -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: testq %rcx, %rcx -; SSSE3-NEXT: setns %cl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %r8b -; SSSE3-NEXT: testq %r10, %r10 -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %cl -; SSSE3-NEXT: andb %r8b, %cl -; SSSE3-NEXT: movzbl %cl, %ecx -; SSSE3-NEXT: negl %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: seto %al ; SSSE3-NEXT: movzbl %al, %eax ; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: movzbl %r8b, %eax +; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movq %rdx, 16(%r11) -; SSSE3-NEXT: movq %rdi, (%r11) -; SSSE3-NEXT: movq %rbp, 24(%r11) -; SSSE3-NEXT: movq %rsi, 8(%r11) -; SSSE3-NEXT: popq %rbx -; SSSE3-NEXT: popq %rbp +; SSSE3-NEXT: movq %rdx, 16(%r10) +; SSSE3-NEXT: movq %rdi, (%r10) +; SSSE3-NEXT: movq %rcx, 24(%r10) +; SSSE3-NEXT: movq %rsi, 8(%r10) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: ssubo_v2i128: ; SSE41: # %bb.0: -; SSE41-NEXT: pushq %rbp -; SSE41-NEXT: pushq %rbx -; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE41-NEXT: testq %r9, %r9 -; SSE41-NEXT: setns %al -; SSE41-NEXT: testq %rsi, %rsi -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %bpl ; SSE41-NEXT: subq %r8, %rdi ; SSE41-NEXT: sbbq %r9, %rsi -; SSE41-NEXT: setns %al -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %al -; SSE41-NEXT: andb %bpl, %al +; SSE41-NEXT: seto %r8b ; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSE41-NEXT: movq %rcx, %rbp -; SSE41-NEXT: sbbq %r10, %rbp -; SSE41-NEXT: setns %bl -; SSE41-NEXT: testq %rcx, %rcx -; SSE41-NEXT: setns %cl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %r8b -; SSE41-NEXT: testq %r10, %r10 -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %cl -; SSE41-NEXT: andb %r8b, %cl -; SSE41-NEXT: movzbl %cl, %ecx -; SSE41-NEXT: negl %ecx -; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSE41-NEXT: seto %al +; SSE41-NEXT: movzbl %al, %r9d +; SSE41-NEXT: negl %r9d +; SSE41-NEXT: movzbl %r8b, %eax ; SSE41-NEXT: negl %eax ; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rdx, 16(%r11) -; SSE41-NEXT: movq %rdi, (%r11) -; SSE41-NEXT: movq %rbp, 24(%r11) -; SSE41-NEXT: movq %rsi, 8(%r11) -; SSE41-NEXT: popq %rbx -; SSE41-NEXT: popq %rbp +; SSE41-NEXT: pinsrd $1, %r9d, %xmm0 +; SSE41-NEXT: movq %rdx, 16(%r10) +; SSE41-NEXT: movq %rdi, (%r10) +; SSE41-NEXT: movq %rcx, 24(%r10) +; SSE41-NEXT: movq %rsi, 8(%r10) ; SSE41-NEXT: retq ; ; AVX1-LABEL: ssubo_v2i128: ; AVX1: # %bb.0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX1-NEXT: testq %r9, %r9 -; AVX1-NEXT: setns %al -; AVX1-NEXT: testq %rsi, %rsi -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %bpl ; AVX1-NEXT: subq %r8, %rdi ; AVX1-NEXT: sbbq %r9, %rsi -; AVX1-NEXT: setns %al -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %al -; AVX1-NEXT: andb %bpl, %al +; AVX1-NEXT: seto %r8b ; AVX1-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX1-NEXT: movq %rcx, %rbp -; AVX1-NEXT: sbbq %r10, %rbp -; AVX1-NEXT: setns %bl -; AVX1-NEXT: testq %rcx, %rcx -; AVX1-NEXT: setns %cl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %r8b -; AVX1-NEXT: testq %r10, %r10 -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %cl -; AVX1-NEXT: andb %r8b, %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: negl %ecx -; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX1-NEXT: seto %al +; AVX1-NEXT: movzbl %al, %r9d +; AVX1-NEXT: negl %r9d +; AVX1-NEXT: movzbl %r8b, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rdx, 16(%r11) -; AVX1-NEXT: movq %rdi, (%r11) -; AVX1-NEXT: movq %rbp, 24(%r11) -; AVX1-NEXT: movq %rsi, 8(%r11) -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: movq %rdx, 16(%r10) +; AVX1-NEXT: movq %rdi, (%r10) +; AVX1-NEXT: movq %rcx, 24(%r10) +; AVX1-NEXT: movq %rsi, 8(%r10) ; AVX1-NEXT: retq ; ; AVX2-LABEL: ssubo_v2i128: ; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: setns %al -; AVX2-NEXT: testq %rsi, %rsi -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %bpl ; AVX2-NEXT: subq %r8, %rdi ; AVX2-NEXT: sbbq %r9, %rsi -; AVX2-NEXT: setns %al -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %al -; AVX2-NEXT: andb %bpl, %al +; AVX2-NEXT: seto %r8b ; AVX2-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX2-NEXT: movq %rcx, %rbp -; AVX2-NEXT: sbbq %r10, %rbp -; AVX2-NEXT: setns %bl -; AVX2-NEXT: testq %rcx, %rcx -; AVX2-NEXT: setns %cl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %r8b -; AVX2-NEXT: testq %r10, %r10 -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %cl -; AVX2-NEXT: andb %r8b, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: seto %al +; AVX2-NEXT: movzbl %al, %r9d +; AVX2-NEXT: negl %r9d +; AVX2-NEXT: movzbl %r8b, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rdx, 16(%r11) -; AVX2-NEXT: movq %rdi, (%r11) -; AVX2-NEXT: movq %rbp, 24(%r11) -; AVX2-NEXT: movq %rsi, 8(%r11) -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: movq %rdx, 16(%r10) +; AVX2-NEXT: movq %rdi, (%r10) +; AVX2-NEXT: movq %rcx, 24(%r10) +; AVX2-NEXT: movq %rsi, 8(%r10) ; AVX2-NEXT: retq ; ; AVX512-LABEL: ssubo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %r14 -; AVX512-NEXT: sbbq %r11, %r14 -; AVX512-NEXT: setns %bl -; AVX512-NEXT: testq %rcx, %rcx -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %bl -; AVX512-NEXT: testq %r11, %r11 -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: setne %al -; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: seto %al ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: setne %al ; AVX512-NEXT: subq %r8, %rdi ; AVX512-NEXT: sbbq %r9, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %cl -; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: andl $1, %ecx -; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: seto %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) -; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rcx, 24(%r10) ; AVX512-NEXT: movq %rsi, 8(%r10) -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0