Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1866,14 +1866,31 @@ if (isNullConstant(N1)) return N0; - // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { - if (N0.getOpcode() == ISD::SUB) - if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneConstantOrOneSplatConstant(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) Index: test/CodeGen/ARM/bool-ext-inc.ll =================================================================== --- test/CodeGen/ARM/bool-ext-inc.ll +++ test/CodeGen/ARM/bool-ext-inc.ll @@ -4,7 +4,7 @@ define i32 @sext_inc(i1 zeroext %x) { ; CHECK-LABEL: sext_inc: ; CHECK: @ BB#0: -; CHECK-NEXT: rsb r0, r0, #1 +; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: mov pc, lr %ext = sext i1 %x to i32 %add = add i32 %ext, 1 @@ -14,14 +14,12 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ; CHECK-LABEL: sext_inc_vec: ; CHECK: @ BB#0: -; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vmov.i32 q9, #0x1f -; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: veor d16, d17, d16 ; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: vneg.s32 q9, q9 -; CHECK-NEXT: vshl.i32 q8, q8, #31 -; CHECK-NEXT: vshl.s32 q8, q8, q9 -; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -35,11 +33,15 @@ ; CHECK: @ BB#0: ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vmov.i32 q10, #0x1 ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: vmov d18, r0, r1 ; CHECK-NEXT: vcgt.s32 q8, q9, q8 -; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vmov.i16 d18, #0x1 +; CHECK-NEXT: vmovn.i32 d16, q8 +; CHECK-NEXT: veor d16, d16, d18 +; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -57,9 +59,13 @@ ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: vmov d18, r0, r1 ; CHECK-NEXT: vceq.i32 q8, q9, q8 -; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: vmov.i16 d18, #0x1 ; CHECK-NEXT: vmvn q8, q8 -; CHECK-NEXT: vadd.i32 q8, q8, q9 +; CHECK-NEXT: vmovn.i32 d16, q8 +; CHECK-NEXT: veor d16, d16, d18 +; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr Index: test/CodeGen/X86/bool-ext-inc.ll =================================================================== --- test/CodeGen/X86/bool-ext-inc.ll +++ test/CodeGen/X86/bool-ext-inc.ll @@ -6,9 +6,8 @@ define i32 @sext_inc(i1 zeroext %x) nounwind { ; CHECK-LABEL: sext_inc: ; CHECK: # BB#0: -; CHECK-NEXT: movzbl %dil, %ecx -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: retq %ext = sext i1 %x to i32 %add = add i32 %ext, 1 @@ -20,9 +19,7 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) nounwind { ; CHECK-LABEL: sext_inc_vec: ; CHECK: # BB#0: -; CHECK-NEXT: pslld $31, %xmm0 -; CHECK-NEXT: psrad $31, %xmm0 -; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: andnps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %ext = sext <4 x i1> %x to <4 x i32> %add = add <4 x i32> %ext, @@ -33,7 +30,7 @@ ; CHECK-LABEL: cmpgt_sext_inc_vec: ; CHECK: # BB#0: ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pandn {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cmp = icmp sgt <4 x i32> %x, %y %ext = sext <4 x i1> %cmp to <4 x i32> @@ -45,9 +42,7 @@ ; CHECK-LABEL: cmpne_sext_inc_vec: ; CHECK: # BB#0: ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: psrld $31, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ne <4 x i32> %x, %y %ext = sext <4 x i1> %cmp to <4 x i32>