Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2312,6 +2312,13 @@ } } + // (x - y) + -1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isAllOnesOrAllOnesSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) return Combined; @@ -2964,6 +2971,13 @@ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) return V; + // (x - y) - 1 -> add (xor y, -1), x + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); + } + // Hoist one-use addition by constant: (x + C) - y -> (x - y) + C if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && isConstantOrConstantVector(N0.getOperand(1))) { Index: llvm/trunk/test/CodeGen/AArch64/xor.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/xor.ll +++ llvm/trunk/test/CodeGen/AArch64/xor.ll @@ -18,8 +18,8 @@ define i32 @add_of_not(i32 %x, i32 %y) { ; CHECK-LABEL: add_of_not: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: sub w0, w8, #1 // =1 +; CHECK-NEXT: mvn w8, w1 +; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret %t0 = sub i32 %x, %y %r = add i32 %t0, -1 @@ -29,8 +29,8 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) { ; CHECK-LABEL: add_of_not_decrement: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: sub w0, w8, #1 // =1 +; CHECK-NEXT: mvn w8, w1 +; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret %t0 = sub i32 %x, %y %r = sub i32 %t0, 1 @@ -40,9 +40,8 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, @@ -52,9 +51,8 @@ define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_decrement: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = sub <4 x i32> %t0, Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -9,17 +9,16 @@ ; VARIANT0: ; %bb.0: ; %entry ; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT0-NEXT: v_not_b32_e32 v3, v0 ; VARIANT0-NEXT: s_mov_b32 s7, 0xf000 ; VARIANT0-NEXT: s_mov_b32 s6, 0 ; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 ; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 -; VARIANT0-NEXT: s_waitcnt expcnt(0) -; VARIANT0-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 -; VARIANT0-NEXT: s_waitcnt vmcnt(0) +; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3 +; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; VARIANT0-NEXT: s_barrier -; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, -1, v0 ; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 @@ -31,18 +30,18 @@ ; VARIANT1: ; %bb.0: ; %entry ; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT1-NEXT: v_not_b32_e32 v3, v0 ; VARIANT1-NEXT: s_mov_b32 s7, 0xf000 ; VARIANT1-NEXT: s_mov_b32 s6, 0 ; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 -; VARIANT1-NEXT: s_waitcnt expcnt(0) -; VARIANT1-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3 ; VARIANT1-NEXT: s_barrier -; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, -1, v0 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 +; VARIANT1-NEXT: s_waitcnt expcnt(0) ; VARIANT1-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 ; VARIANT1-NEXT: s_waitcnt vmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 @@ -60,8 +59,7 @@ ; VARIANT2-NEXT: global_store_dword v[1:2], v0, off ; VARIANT2-NEXT: s_waitcnt vmcnt(0) ; VARIANT2-NEXT: s_barrier -; VARIANT2-NEXT: v_sub_u32_e32 v0, s0, v0 -; VARIANT2-NEXT: v_add_u32_e32 v3, -1, v0 +; VARIANT2-NEXT: v_xad_u32 v3, v0, -1, s0 ; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] ; VARIANT2-NEXT: v_mov_b32_e32 v0, s3 @@ -83,8 +81,7 @@ ; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc ; VARIANT3-NEXT: global_store_dword v[1:2], v0, off ; VARIANT3-NEXT: s_barrier -; VARIANT3-NEXT: v_sub_u32_e32 v0, s0, v0 -; VARIANT3-NEXT: v_add_u32_e32 v3, -1, v0 +; VARIANT3-NEXT: v_xad_u32 v3, v0, -1, s0 ; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] ; VARIANT3-NEXT: v_mov_b32_e32 v0, s3 Index: llvm/trunk/test/CodeGen/X86/xor.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xor.ll +++ llvm/trunk/test/CodeGen/X86/xor.ll @@ -532,22 +532,24 @@ ; X32-LABEL: add_of_not: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: decl %eax +; X32-NEXT: notl %eax +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LIN-LABEL: add_of_not: ; X64-LIN: # %bb.0: +; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi ; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi -; X64-LIN-NEXT: subl %esi, %edi -; X64-LIN-NEXT: leal -1(%rdi), %eax +; X64-LIN-NEXT: notl %esi +; X64-LIN-NEXT: leal (%rsi,%rdi), %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: add_of_not: ; X64-WIN: # %bb.0: +; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx ; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-WIN-NEXT: subl %edx, %ecx -; X64-WIN-NEXT: leal -1(%rcx), %eax +; X64-WIN-NEXT: notl %edx +; X64-WIN-NEXT: leal (%rdx,%rcx), %eax ; X64-WIN-NEXT: retq %t0 = sub i32 %x, %y %r = add i32 %t0, -1 @@ -558,22 +560,24 @@ ; X32-LABEL: add_of_not_decrement: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: decl %eax +; X32-NEXT: notl %eax +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LIN-LABEL: add_of_not_decrement: ; X64-LIN: # %bb.0: +; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi ; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi -; X64-LIN-NEXT: subl %esi, %edi -; X64-LIN-NEXT: leal -1(%rdi), %eax +; X64-LIN-NEXT: notl %esi +; X64-LIN-NEXT: leal (%rsi,%rdi), %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: add_of_not_decrement: ; X64-WIN: # %bb.0: +; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx ; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-WIN-NEXT: subl %edx, %ecx -; X64-WIN-NEXT: leal -1(%rcx), %eax +; X64-WIN-NEXT: notl %edx +; X64-WIN-NEXT: leal (%rdx,%rcx), %eax ; X64-WIN-NEXT: retq %t0 = sub i32 %x, %y %r = sub i32 %t0, 1 @@ -583,24 +587,23 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) { ; X32-LABEL: vec_add_of_not: ; X32: # %bb.0: -; X32-NEXT: psubd %xmm1, %xmm0 -; X32-NEXT: pcmpeqd %xmm1, %xmm1 -; X32-NEXT: paddd %xmm1, %xmm0 +; X32-NEXT: pcmpeqd %xmm2, %xmm2 +; X32-NEXT: pxor %xmm1, %xmm2 +; X32-NEXT: paddd %xmm2, %xmm0 ; X32-NEXT: retl ; ; X64-LIN-LABEL: vec_add_of_not: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: psubd %xmm1, %xmm0 -; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-LIN-NEXT: paddd %xmm1, %xmm0 +; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2 +; X64-LIN-NEXT: pxor %xmm1, %xmm2 +; X64-LIN-NEXT: paddd %xmm2, %xmm0 ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: vec_add_of_not: ; X64-WIN: # %bb.0: -; X64-WIN-NEXT: movdqa (%rcx), %xmm1 -; X64-WIN-NEXT: psubd (%rdx), %xmm1 ; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0 -; X64-WIN-NEXT: paddd %xmm1, %xmm0 +; X64-WIN-NEXT: pxor (%rdx), %xmm0 +; X64-WIN-NEXT: paddd (%rcx), %xmm0 ; X64-WIN-NEXT: retq %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, @@ -610,24 +613,23 @@ define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) { ; X32-LABEL: vec_add_of_not_decrement: ; X32: # %bb.0: -; X32-NEXT: psubd %xmm1, %xmm0 -; X32-NEXT: pcmpeqd %xmm1, %xmm1 -; X32-NEXT: paddd %xmm1, %xmm0 +; X32-NEXT: pcmpeqd %xmm2, %xmm2 +; X32-NEXT: pxor %xmm1, %xmm2 +; X32-NEXT: paddd %xmm2, %xmm0 ; X32-NEXT: retl ; ; X64-LIN-LABEL: vec_add_of_not_decrement: ; X64-LIN: # %bb.0: -; X64-LIN-NEXT: psubd %xmm1, %xmm0 -; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-LIN-NEXT: paddd %xmm1, %xmm0 +; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2 +; X64-LIN-NEXT: pxor %xmm1, %xmm2 +; X64-LIN-NEXT: paddd %xmm2, %xmm0 ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: vec_add_of_not_decrement: ; X64-WIN: # %bb.0: -; X64-WIN-NEXT: movdqa (%rcx), %xmm1 -; X64-WIN-NEXT: psubd (%rdx), %xmm1 ; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0 -; X64-WIN-NEXT: paddd %xmm1, %xmm0 +; X64-WIN-NEXT: pxor (%rdx), %xmm0 +; X64-WIN-NEXT: paddd (%rcx), %xmm0 ; X64-WIN-NEXT: retq %t0 = sub <4 x i32> %x, %y %r = sub <4 x i32> %t0,