Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2295,9 +2295,6 @@ } // add (sext i1 X), 1 -> zext (not i1 X) - // We don't transform this pattern: - // add (zext i1 X), -1 -> sext (not i1 X) - // because most (?) targets generate better code for the zext form. if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && isOneOrOneSplat(N1)) { SDValue X = N0.getOperand(0); @@ -2310,6 +2307,27 @@ } } + // We transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // in the opposite direction because most targets generate better code for + // the zext form. + // TODO: Add a TLI method if any target prefers the opposite fold. + // + // The exception is when creating the sext would allow us to fold away the + // not. + if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + isAllOnesOrAllOnesSplat(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + if (SDValue Xor = visitXOR(Not.getNode())) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Xor); + } + } + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is // equivalent to (add x, c0). if (N0.getOpcode() == ISD::OR && @@ -10561,6 +10579,15 @@ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + // fold sext (not i1 X) -> add (zext i1 X), -1 + if (N0.getOpcode() == ISD::XOR && N0.hasOneUse() && + N0.getScalarValueSizeInBits() == 1 && isOneOrOneSplat(N0.getOperand(1)) && + (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegal(ISD::ADD, VT)))) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47018,7 +47018,6 @@ const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT InVT = N0.getValueType(); SDLoc DL(N); // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) @@ -47047,16 +47046,6 @@ if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && - isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { - // Invert and sign-extend a boolean is the same as zero-extend and subtract - // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently - // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. - // sext (xor Bool, -1) --> sub (zext Bool), 1 - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); - return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); - } - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; Index: llvm/test/CodeGen/AArch64/select_const.ll =================================================================== --- llvm/test/CodeGen/AArch64/select_const.ll +++ llvm/test/CodeGen/AArch64/select_const.ll @@ -68,8 +68,8 @@ define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: and w8, w0, #0x1 +; CHECK-NEXT: sub w0, w8, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -78,8 +78,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: sub w0, w0, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel Index: llvm/test/CodeGen/ARM/select_const.ll =================================================================== --- llvm/test/CodeGen/ARM/select_const.ll +++ llvm/test/CodeGen/ARM/select_const.ll @@ -137,23 +137,21 @@ define i32 @select_0_or_neg1(i1 %cond) { ; ARM-LABEL: select_0_or_neg1: ; ARM: @ %bb.0: -; ARM-NEXT: mov r1, #1 -; ARM-NEXT: bic r0, r1, r0 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: and r0, r0, #1 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: movs r1, #1 -; THUMB2-NEXT: bic.w r0, r1, r0 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: and r0, r0, #1 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1: ; THUMB: @ %bb.0: ; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: bics r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: ands r1, r0 +; THUMB-NEXT: subs r0, r1, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -162,21 +160,17 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ARM-LABEL: select_0_or_neg1_zeroext: ; ARM: @ %bb.0: -; ARM-NEXT: eor r0, r0, #1 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1_zeroext: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: eor r0, r0, #1 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1_zeroext: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: eors r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: subs r0, r0, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel Index: llvm/test/CodeGen/PowerPC/select_const.ll =================================================================== --- llvm/test/CodeGen/PowerPC/select_const.ll +++ llvm/test/CodeGen/PowerPC/select_const.ll @@ -69,9 +69,8 @@ define i32 @select_0_or_neg1(i1 %cond) { ; ALL-LABEL: select_0_or_neg1: ; ALL: # %bb.0: -; ALL-NEXT: not 3, 3 ; ALL-NEXT: clrldi 3, 3, 63 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -80,8 +79,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ALL-LABEL: select_0_or_neg1_zeroext: ; ALL: # %bb.0: -; ALL-NEXT: xori 3, 3, 1 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel Index: llvm/test/CodeGen/RISCV/sext-zext-trunc.ll =================================================================== --- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -437,20 +437,18 @@ ret i32 %1 } -;; TODO: fold (sext (not x)) -> (add (zext x) -1) +;; fold (sext (not x)) -> (add (zext x) -1) define i32 @sext_of_not_i32(i1 %x) { ; RV32I-LABEL: sext_of_not_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i32 @@ -460,24 +458,23 @@ define i64 @sext_of_not_i64(i1 %x) { ; RV32I-LABEL: sext_of_not_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: not a0, a0 -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: andi a1, a0, 1 +; RV32I-NEXT: addi a0, a1, -1 +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %xor = xor i1 %x, 1 %sext = sext i1 %xor to i64 ret i64 %sext } -;; TODO: fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) +;; fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) define i32 @sext_of_not_cmp_i32(i32 %x) { ; RV32I-LABEL: sext_of_not_cmp_i32: ; RV32I: # %bb.0: @@ -522,13 +519,13 @@ ret i64 %sext } -;; TODO: fold (add (zext (setcc a, b, cc)), -1) -> (sext (setcc a, b, !cc)) +;; fold (add (zext (setcc a, b, cc)), -1) -> (sext (setcc a, b, !cc)) define i32 @dec_of_zexted_cmp_i32(i32 %x) { ; RV32I-LABEL: dec_of_zexted_cmp_i32: ; RV32I: # %bb.0: ; RV32I-NEXT: addi a0, a0, -7 -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: dec_of_zexted_cmp_i32: @@ -536,8 +533,8 @@ ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret %cmp = icmp eq i32 %x, 7 %zext = zext i1 %cmp to i32 @@ -550,17 +547,16 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 7 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: seqz a1, a0 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: dec_of_zexted_cmp_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret %cmp = icmp eq i64 %x, 7 %zext = zext i1 %cmp to i64 Index: llvm/test/CodeGen/SystemZ/sext-zext.ll =================================================================== --- llvm/test/CodeGen/SystemZ/sext-zext.ll +++ llvm/test/CodeGen/SystemZ/sext-zext.ll @@ -1,20 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -;; TODO: fold (sext (not x)) -> (add (zext x) -1) +;; fold (sext (not x)) -> (add (zext x) -1) define i32 @sext_of_not(i1 %x) { ; CHECK-LABEL: sext_of_not: ; CHECK: # %bb.0: -; CHECK-NEXT: xilf %r2, 4294967295 ; CHECK-NEXT: nilf %r2, 1 -; CHECK-NEXT: lcr %r2, %r2 +; CHECK-NEXT: ahi %r2, -1 ; CHECK-NEXT: br %r14 %xor = xor i1 %x, 1 %sext = sext i1 %xor to i32 ret i32 %sext } -;; TODO: fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) +;; fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) define i32 @sext_of_not_cmp(i32 %x) { ; CHECK-LABEL: sext_of_not_cmp: ; CHECK: # %bb.0: @@ -29,15 +28,14 @@ ret i32 %sext } -;; TODO: fold (add (zext (setcc a, b, cc)), -1) -> (sext (setcc a, b, !cc)) +;; fold (add (zext (setcc a, b, cc)), -1) -> (sext (setcc a, b, !cc)) define i32 @dec_of_zexted_cmp(i32 %x) { ; CHECK-LABEL: dec_of_zexted_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: chi %r2, 7 ; CHECK-NEXT: ipm %r2 -; CHECK-NEXT: afi %r2, -268435456 -; CHECK-NEXT: srl %r2, 31 -; CHECK-NEXT: ahi %r2, -1 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 ; CHECK-NEXT: br %r14 %cmp = icmp eq i32 %x, 7 %zext = zext i1 %cmp to i32 Index: llvm/test/CodeGen/X86/pr44140.ll =================================================================== --- llvm/test/CodeGen/X86/pr44140.ll +++ llvm/test/CodeGen/X86/pr44140.ll @@ -49,8 +49,8 @@ ; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: sete %al -; CHECK-NEXT: decl %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: negl %eax ; CHECK-NEXT: addq $584, %rsp # imm = 0x248 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq