diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2295,9 +2295,6 @@ } // add (sext i1 X), 1 -> zext (not i1 X) - // We don't transform this pattern: - // add (zext i1 X), -1 -> sext (not i1 X) - // because most (?) targets generate better code for the zext form. if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && isOneOrOneSplat(N1)) { SDValue X = N0.getOperand(0); @@ -2310,6 +2307,27 @@ } } + // We transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // in the opposite direction because most targets generate better code for + // the zext form. + // TODO: Add a TLI method if any target prefers the opposite fold. + // + // The exception is when creating the sext would allow us to fold away the + // not. + if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + isAllOnesOrAllOnesSplat(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + if (SDValue Xor = visitXOR(Not.getNode())) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Xor); + } + } + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is // equivalent to (add x, c0). if (N0.getOpcode() == ISD::OR && @@ -10478,6 +10496,15 @@ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + // fold sext (not i1 X) -> add (zext i1 X), -1 + if (N0.getOpcode() == ISD::XOR && N0.hasOneUse() && + N0.getScalarValueSizeInBits() == 1 && isOneOrOneSplat(N0.getOperand(1)) && + (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegal(ISD::ADD, VT)))) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46987,7 +46987,6 @@ const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT InVT = N0.getValueType(); SDLoc DL(N); // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) @@ -47016,16 +47015,6 @@ if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && - isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { - // Invert and sign-extend a boolean is the same as zero-extend and subtract - // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently - // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. - // sext (xor Bool, -1) --> sub (zext Bool), 1 - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); - return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); - } - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -68,8 +68,8 @@ define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: and w8, w0, #0x1 +; CHECK-NEXT: sub w0, w8, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -78,8 +78,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sbfx w0, w8, #0, #1 +; CHECK-NEXT: sub w0, w0, #1 // =1 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -137,23 +137,21 @@ define i32 @select_0_or_neg1(i1 %cond) { ; ARM-LABEL: select_0_or_neg1: ; ARM: @ %bb.0: -; ARM-NEXT: mov r1, #1 -; ARM-NEXT: bic r0, r1, r0 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: and r0, r0, #1 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: movs r1, #1 -; THUMB2-NEXT: bic.w r0, r1, r0 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: and r0, r0, #1 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1: ; THUMB: @ %bb.0: ; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: bics r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: ands r1, r0 +; THUMB-NEXT: subs r0, r1, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -162,21 +160,17 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ARM-LABEL: select_0_or_neg1_zeroext: ; ARM: @ %bb.0: -; ARM-NEXT: eor r0, r0, #1 -; ARM-NEXT: rsb r0, r0, #0 +; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB2-LABEL: select_0_or_neg1_zeroext: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: eor r0, r0, #1 -; THUMB2-NEXT: rsbs r0, r0, #0 +; THUMB2-NEXT: subs r0, #1 ; THUMB2-NEXT: bx lr ; ; THUMB-LABEL: select_0_or_neg1_zeroext: ; THUMB: @ %bb.0: -; THUMB-NEXT: movs r1, #1 -; THUMB-NEXT: eors r1, r0 -; THUMB-NEXT: rsbs r0, r1, #0 +; THUMB-NEXT: subs r0, r0, #1 ; THUMB-NEXT: bx lr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll --- a/llvm/test/CodeGen/PowerPC/select_const.ll +++ b/llvm/test/CodeGen/PowerPC/select_const.ll @@ -69,9 +69,8 @@ define i32 @select_0_or_neg1(i1 %cond) { ; ALL-LABEL: select_0_or_neg1: ; ALL: # %bb.0: -; ALL-NEXT: not 3, 3 ; ALL-NEXT: clrldi 3, 3, 63 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -80,8 +79,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; ALL-LABEL: select_0_or_neg1_zeroext: ; ALL: # %bb.0: -; ALL-NEXT: xori 3, 3, 1 -; ALL-NEXT: neg 3, 3 +; ALL-NEXT: addi 3, 3, -1 ; ALL-NEXT: blr %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/SystemZ/sext-zext.ll b/llvm/test/CodeGen/SystemZ/sext-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/sext-zext.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +;; fold (sext (not x)) -> (add (zext x) -1) +define i32 @sext_of_not(i1 %x) { +; CHECK-LABEL: sext_of_not: +; CHECK: # %bb.0: +; CHECK-NEXT: nilf %r2, 1 +; CHECK-NEXT: ahi %r2, -1 +; CHECK-NEXT: br %r14 + %xor = xor i1 %x, 1 + %sext = sext i1 %xor to i32 + ret i32 %sext +} + +;; fold (sext (not (setcc a, b, cc))) -> (sext (setcc a, b, !cc)) +define i32 @sext_of_not_cmp(i32 %x) { +; CHECK-LABEL: sext_of_not_cmp: +; CHECK: # %bb.0: +; CHECK-NEXT: chi %r2, 7 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK-NEXT: br %r14 + %cmp = icmp eq i32 %x, 7 + %xor = xor i1 %cmp, 1 + %sext = sext i1 %xor to i32 + ret i32 %sext +} + +;; fold (add (zext (setcc a, b, cc)), -1) -> (sext (setcc a, b, !cc)) +define i32 @dec_of_zexted_cmp(i32 %x) { +; CHECK-LABEL: dec_of_zexted_cmp: +; CHECK: # %bb.0: +; CHECK-NEXT: chi %r2, 7 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK-NEXT: br %r14 + %cmp = icmp eq i32 %x, 7 + %zext = zext i1 %cmp to i32 + %dec = sub i32 %zext, 1 + ret i32 %dec +} diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -49,8 +49,8 @@ ; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: sete %al -; CHECK-NEXT: decl %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: negl %eax ; CHECK-NEXT: addq $584, %rsp # imm = 0x248 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq