Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1949,6 +1949,45 @@ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } +static SDValue foldAddSubOfMaskedBool(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB && + "Expecting add or sub"); + + // Match a constant operand and a zext operand for the math instruction: + // add Z, C + // sub C, Z + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); + auto *CN = dyn_cast(C); + if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + + // Match the zext operand as a setcc of a boolean. + if (Z.getOperand(0).getOpcode() != ISD::SETCC || + Z.getOperand(0).getValueType() != MVT::i1) + return SDValue(); + + // Match the compare as: setcc (X & 1), 0, eq. + SDValue SetCC = Z.getOperand(0); + ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); + if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || + SetCC.getOperand(0).getOpcode() != ISD::AND || + !isOneConstant(SetCC.getOperand(0).getOperand(1))) + return SDValue(); + + // We are adding/subtracting an inverted low bit from a constant. Turn that + // into a subtract/add of the low bit with incremented/decremented constant: + // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) + // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) + EVT VT = C.getValueType(); + SDLoc DL(N); + SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); + SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : + DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2080,6 +2119,9 @@ DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } + if (SDValue V = foldAddSubOfMaskedBool(N, DAG)) + return V; + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -2579,6 +2621,9 @@ if (N1.isUndef()) return N1; + if (SDValue V = foldAddSubOfMaskedBool(N, DAG)) + return V; + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { Index: test/CodeGen/PowerPC/bool-math.ll =================================================================== --- test/CodeGen/PowerPC/bool-math.ll +++ test/CodeGen/PowerPC/bool-math.ll @@ -4,9 +4,9 @@ define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, -27 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: ori 3, 3, 65508 +; CHECK-NEXT: oris 3, 3, 65535 ; CHECK-NEXT: blr %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -18,9 +18,8 @@ define i32 @sub_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 27 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: ori 3, 3, 26 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -32,9 +31,8 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 47 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: ori 3, 3, 46 ; CHECK-NEXT: blr %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -46,10 +44,8 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xori 3, 3, 65535 -; CHECK-NEXT: xoris 3, 3, 65535 -; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 26 +; CHECK-NEXT: rlwinm 3, 3, 0, 31, 31 +; CHECK-NEXT: subfic 3, 3, 27 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -61,10 +57,8 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xori 3, 3, 65535 -; CHECK-NEXT: xoris 3, 3, 65535 -; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 26 +; CHECK-NEXT: rlwinm 3, 3, 0, 31, 31 +; CHECK-NEXT: subfic 3, 3, 27 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -76,10 +70,8 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: add_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xori 3, 3, 65535 -; CHECK-NEXT: xoris 3, 3, 65535 -; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 42 +; CHECK-NEXT: rlwinm 3, 3, 0, 31, 31 +; CHECK-NEXT: subfic 3, 3, 43 ; CHECK-NEXT: blr %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -130,11 +122,8 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xori 3, 3, 65535 -; CHECK-NEXT: xoris 3, 3, 65535 -; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 65506 -; CHECK-NEXT: oris 3, 3, 65535 +; CHECK-NEXT: rlwinm 3, 3, 0, 31, 31 +; CHECK-NEXT: subfic 3, 3, -29 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -145,9 +134,8 @@ define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: not 3, 3 ; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 226 +; CHECK-NEXT: subfic 3, 3, 227 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -158,10 +146,8 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xori 3, 3, 65535 -; CHECK-NEXT: xoris 3, 3, 65535 -; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: ori 3, 3, 40 +; CHECK-NEXT: rlwinm 3, 3, 0, 31, 31 +; CHECK-NEXT: subfic 3, 3, 41 ; CHECK-NEXT: blr %a = and i16 %x, 1 %c = icmp eq i16 %a, 0 Index: test/CodeGen/X86/bool-math.ll =================================================================== --- test/CodeGen/X86/bool-math.ll +++ test/CodeGen/X86/bool-math.ll @@ -4,10 +4,9 @@ define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: movl $-27, %eax -; CHECK-NEXT: sbbl $0, %eax +; CHECK-NEXT: leal -28(%rdi), %eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -19,10 +18,9 @@ define i32 @sub_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: cmpb $1, %dil -; CHECK-NEXT: movl $27, %eax -; CHECK-NEXT: sbbl $0, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: leal 26(%rdi), %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -35,9 +33,8 @@ ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: movb $47, %al -; CHECK-NEXT: sbbb $0, %al +; CHECK-NEXT: orb $46, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -49,9 +46,9 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orb $26, %al +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: movb $27, %al +; CHECK-NEXT: subb %dil, %al ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -63,10 +60,9 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orl $26, %eax +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl $27, %eax +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -78,9 +74,9 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: add_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orb $42, %al +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movb $43, %al +; CHECK-NEXT: subb %dil, %al ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -92,10 +88,9 @@ define i32 @low_bit_select_constants_bigger_false_same_size_result(i32 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_false_same_size_result: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: movl $43, %eax -; CHECK-NEXT: sbbl $0, %eax +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -106,10 +101,9 @@ define i64 @low_bit_select_constants_bigger_false_wider_result(i32 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_false_wider_result: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: movl $27, %eax -; CHECK-NEXT: sbbq $0, %rax +; CHECK-NEXT: leaq 26(%rdi), %rax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -120,10 +114,10 @@ define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_false_narrower_result: ; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: cmpl $1, %edi -; CHECK-NEXT: movw $37, %ax -; CHECK-NEXT: sbbw $0, %ax +; CHECK-NEXT: leal 36(%rdi), %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -134,9 +128,9 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orb $-30, %al +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: movb $-29, %al +; CHECK-NEXT: subb %dil, %al ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -147,10 +141,9 @@ define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orl $226, %eax +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl $227, %eax +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: retq %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -161,9 +154,9 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: sete %al -; CHECK-NEXT: orb $40, %al +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movb $41, %al +; CHECK-NEXT: subb %dil, %al ; CHECK-NEXT: retq %a = and i16 %x, 1 %c = icmp eq i16 %a, 0