diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52270,7 +52270,8 @@ /// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}. static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, SDValue X, SDValue Y, - SelectionDAG &DAG) { + SelectionDAG &DAG, + bool ZeroSecondOpOnly = false) { if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); @@ -52294,7 +52295,7 @@ // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. auto *ConstantX = dyn_cast(X); - if (ConstantX) { + if (ConstantX && !ZeroSecondOpOnly) { if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) || (IsSub && CC == X86::COND_B && ConstantX->isZero())) { // This is a complicated way to get -1 or 0 from the carry flag: @@ -52332,6 +52333,9 @@ DAG.getConstant(0, DL, VT), EFLAGS); } + if (ZeroSecondOpOnly) + return SDValue(); + if (CC == X86::COND_A) { // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". @@ -52589,7 +52593,8 @@ SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); MVT VT = LHS.getSimpleValueType(); - unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; + bool IsSub = X86ISD::SUB == N->getOpcode(); + unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD; // If we don't use the flag result, simplify back to a generic ADD/SUB. if (!N->hasAnyUseOfValue(1)) { @@ -52611,7 +52616,10 @@ MatchGeneric(LHS, RHS, false); MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); - return SDValue(); + // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the + // EFLAGS result doesn't change. + return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG, + /*ZeroSecondOpOnly*/ true); } static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,NOTBM -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64,TBM +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64 ; PR35908 - Fold ADD/SUB and bit extracts into ADC/SBB+BT ; @@ -104,55 +104,40 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind { ; X86-LABEL: test_i128_add_add_idx: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: addl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: addl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $5, %edx -; X86-NEXT: andl $1, %edx -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: adcl $0, %edi +; X86-NEXT: btl $5, {{[0-9]+}}(%esp) ; X86-NEXT: adcl $0, %esi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl $4 ; -; NOTBM-LABEL: test_i128_add_add_idx: -; NOTBM: # %bb.0: -; NOTBM-NEXT: movq %r9, %rax -; NOTBM-NEXT: addq %rdx, %rdi -; NOTBM-NEXT: adcq %rcx, %rsi -; NOTBM-NEXT: shrl $5, %eax -; NOTBM-NEXT: andl $1, %eax -; NOTBM-NEXT: addq %rdi, %rax -; NOTBM-NEXT: adcq $0, %rsi -; NOTBM-NEXT: movq %rsi, %rdx -; NOTBM-NEXT: retq -; -; TBM-LABEL: test_i128_add_add_idx: -; TBM: # %bb.0: -; TBM-NEXT: addq %rdx, %rdi -; TBM-NEXT: adcq %rcx, %rsi -; TBM-NEXT: bextrl $261, %r9d, %eax # imm = 0x105 -; TBM-NEXT: addq %rdi, %rax -; TBM-NEXT: adcq $0, %rsi -; TBM-NEXT: movq %rsi, %rdx -; TBM-NEXT: retq +; X64-LABEL: test_i128_add_add_idx: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: addq %rdx, %rax +; X64-NEXT: adcq %rcx, %rsi +; X64-NEXT: btl $5, %r9d +; X64-NEXT: adcq $0, %rax +; X64-NEXT: adcq $0, %rsi +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: retq %add = add i128 %y, %x %shift = lshr i128 %z, 69 %mask = and i128 %shift, 1 diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -713,14 +713,12 @@ ; CHECK-LABEL: addcarry_mixed_2x64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: setb %dil ; CHECK-NEXT: addq %rcx, %rsi -; CHECK-NEXT: setb %dl -; CHECK-NEXT: subq %rdi, %rsi +; CHECK-NEXT: setb %dil +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: setb %cl -; CHECK-NEXT: orb %dl, %cl +; CHECK-NEXT: orb %dil, %cl ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: retq %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0) diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -58,33 +58,29 @@ define void @PR25858_i64(%WideUInt64* sret(%WideUInt64), %WideUInt64*, %WideUInt64*) nounwind { ; X86-LABEL: PR25858_i64: ; X86: # %bb.0: # %top -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl (%edi), %ecx -; X86-NEXT: movl 4(%edi), %edx -; X86-NEXT: subl (%esi), %ecx -; X86-NEXT: sbbl 4(%esi), %edx -; X86-NEXT: setb %bl -; X86-NEXT: movl 12(%edi), %ebp +; X86-NEXT: movl (%edi), %edx +; X86-NEXT: movl 4(%edi), %esi +; X86-NEXT: movl 12(%edi), %ecx ; X86-NEXT: movl 8(%edi), %edi -; X86-NEXT: subl 8(%esi), %edi -; X86-NEXT: sbbl 12(%esi), %ebp -; X86-NEXT: movzbl %bl, %esi -; X86-NEXT: subl %esi, %edi -; X86-NEXT: sbbl $0, %ebp -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: subl 8(%ebx), %edi +; X86-NEXT: sbbl 12(%ebx), %ecx +; X86-NEXT: subl (%ebx), %edx +; X86-NEXT: sbbl 4(%ebx), %esi +; X86-NEXT: sbbl $0, %edi +; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %esi, 4(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: PR25858_i64: diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -312,16 +312,15 @@ ; CHECK-LABEL: subcarry_2x64_add_reversed: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: subq %rcx, %rdi ; CHECK-NEXT: subq %rdx, %rax -; CHECK-NEXT: setb %dil -; CHECK-NEXT: movq %rsi, %rdx -; CHECK-NEXT: subq %rcx, %rdx -; CHECK-NEXT: subq %rdi, %rdx -; CHECK-NEXT: setb %dil +; CHECK-NEXT: sbbq $0, %rdi +; CHECK-NEXT: setb %r8b ; CHECK-NEXT: cmpq %rcx, %rsi -; CHECK-NEXT: adcb $0, %dil -; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: adcb $0, %r8b +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %r8d, %ecx ; CHECK-NEXT: retq %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0) %s0 = extractvalue { i64, i1 } %t0, 0 @@ -601,22 +600,20 @@ ; CHECK-LABEL: sub_U256_without_i128_or_recursive: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq (%rsi), %r9 -; CHECK-NEXT: movq 8(%rsi), %r10 -; CHECK-NEXT: subq (%rdx), %r9 -; CHECK-NEXT: sbbq 8(%rdx), %r10 -; CHECK-NEXT: setb %r8b +; CHECK-NEXT: movq (%rsi), %r8 +; CHECK-NEXT: movq 8(%rsi), %r9 ; CHECK-NEXT: movq 16(%rsi), %rcx ; CHECK-NEXT: movq 24(%rsi), %rsi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: subq 16(%rdx), %rcx ; CHECK-NEXT: setb %dil ; CHECK-NEXT: subq 24(%rdx), %rsi -; CHECK-NEXT: movzbl %r8b, %edx -; CHECK-NEXT: subq %rdx, %rcx +; CHECK-NEXT: subq (%rdx), %r8 +; CHECK-NEXT: sbbq 8(%rdx), %r9 +; CHECK-NEXT: sbbq $0, %rcx ; CHECK-NEXT: sbbq %rdi, %rsi -; CHECK-NEXT: movq %r9, (%rax) -; CHECK-NEXT: movq %r10, 8(%rax) +; CHECK-NEXT: movq %r8, (%rax) +; CHECK-NEXT: movq %r9, 8(%rax) ; CHECK-NEXT: movq %rcx, 16(%rax) ; CHECK-NEXT: movq %rsi, 24(%rax) ; CHECK-NEXT: retq @@ -668,12 +665,10 @@ define i1 @subcarry_ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind { ; CHECK-LABEL: subcarry_ult_2x64: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rdx, %rdi -; CHECK-NEXT: setb %al ; CHECK-NEXT: subq %rcx, %rsi ; CHECK-NEXT: setb %cl -; CHECK-NEXT: cmpq %rax, %rsi +; CHECK-NEXT: cmpq %rdx, %rdi +; CHECK-NEXT: sbbq $0, %rsi ; CHECK-NEXT: setb %al ; CHECK-NEXT: orb %cl, %al ; CHECK-NEXT: retq