Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1866,14 +1866,31 @@ if (isNullConstant(N1)) return N0; - // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { - if (N0.getOpcode() == ISD::SUB) - if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneConstantOrOneSplatConstant(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) Index: llvm/trunk/test/CodeGen/ARM/bool-ext-inc.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/bool-ext-inc.ll +++ llvm/trunk/test/CodeGen/ARM/bool-ext-inc.ll @@ -4,7 +4,7 @@ define i32 @sext_inc(i1 zeroext %x) { ; CHECK-LABEL: sext_inc: ; CHECK: @ BB#0: -; CHECK-NEXT: rsb r0, r0, #1 +; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: mov pc, lr %ext = sext i1 %x to i32 %add = add i32 %ext, 1 @@ -14,14 +14,12 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ; CHECK-LABEL: sext_inc_vec: ; CHECK: @ BB#0: -; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vmov.i32 q9, #0x1f -; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: veor d16, d17, d16 ; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: vneg.s32 q9, q9 -; CHECK-NEXT: vshl.i32 q8, q8, #31 -; CHECK-NEXT: vshl.s32 q8, q8, q9 -; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -38,8 +36,8 @@ ; CHECK-NEXT: vmov.i32 q10, #0x1 ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vcgt.s32 q8, q9, q8 -; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vcge.s32 q8, q8, q9 +; CHECK-NEXT: vand q8, q8, q10 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -54,12 +52,11 @@ ; CHECK: @ BB#0: ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: vmov.i32 q10, #0x1 ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: vmov d18, r0, r1 ; CHECK-NEXT: vceq.i32 q8, q9, q8 -; CHECK-NEXT: vmov.i32 q9, #0x1 -; CHECK-NEXT: vmvn q8, q8 -; CHECK-NEXT: vadd.i32 q8, q8, q9 +; CHECK-NEXT: vand q8, q8, q10 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr Index: llvm/trunk/test/CodeGen/X86/bool-ext-inc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bool-ext-inc.ll +++ llvm/trunk/test/CodeGen/X86/bool-ext-inc.ll @@ -1,29 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s -; FIXME: add (sext i1 X), 1 -> zext (not i1 X) +; add (sext i1 X), 1 -> zext (not i1 X) define i32 @sext_inc(i1 zeroext %x) nounwind { ; CHECK-LABEL: sext_inc: ; CHECK: # BB#0: -; CHECK-NEXT: movzbl %dil, %ecx -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %add = add i32 %ext, 1 ret i32 %add } -; FIXME: add (sext i1 X), 1 -> zext (not i1 X) +; add (sext i1 X), 1 -> zext (not i1 X) define <4 x i32> @sext_inc_vec(<4 x i1> %x) nounwind { ; CHECK-LABEL: sext_inc_vec: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 +; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %ext = sext <4 x i1> %x to <4 x i32> %add = add <4 x i32> %ext, @@ -35,7 +32,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp sgt <4 x i32> %x, %y %ext = sext <4 x i1> %cmp to <4 x i32> @@ -47,10 +44,7 @@ ; CHECK-LABEL: cmpne_sext_inc_vec: ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ne <4 x i32> %x, %y %ext = sext <4 x i1> %cmp to <4 x i32> @@ -63,7 +57,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %cmp = icmp sgt <4 x i64> %x, %y %ext = sext <4 x i1> %cmp to <4 x i64> @@ -75,13 +69,11 @@ ; CHECK-LABEL: bool_logic_and_math: ; CHECK: # BB#0: ; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setne %al +; CHECK-NEXT: sete %al ; CHECK-NEXT: cmpl %ecx, %edx -; CHECK-NEXT: setne %cl -; CHECK-NEXT: andb %al, %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: retq %cmp1 = icmp ne i32 %a, %b %cmp2 = icmp ne i32 %c, %d @@ -95,12 +87,12 @@ ; CHECK-LABEL: bool_logic_and_math_vec: ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp1 = icmp ne <4 x i32> %a, %b %cmp2 = icmp ne <4 x i32> %c, %d