diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51872,6 +51872,62 @@ return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); } +static SDValue combineXorSubCTLZ(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert(N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::SUB); + if (Subtarget.hasFastLZCNT()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 && + (VT != MVT::i64 || !Subtarget.is64Bit())) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (N0.getOpcode() != ISD::CTLZ_ZERO_UNDEF && + N1.getOpcode() != ISD::CTLZ_ZERO_UNDEF) + return SDValue(); + + SDValue OpCTLZ; + SDValue OpSizeTM1; + + if (N1.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + OpCTLZ = N1; + OpSizeTM1 = N0; + } else if (N->getOpcode() == ISD::SUB) { + return SDValue(); + } else { + OpCTLZ = N0; + OpSizeTM1 = N1; + } + + if (!OpCTLZ.hasOneUse()) + return SDValue(); + auto *C = dyn_cast(OpSizeTM1); + if (C == nullptr) + return SDValue(); + + if (C->getZExtValue() != uint64_t(OpCTLZ.getValueSizeInBits() - 1)) + return SDValue(); + SDLoc DL(N); + EVT OpVT = VT; + SDValue Op = OpCTLZ.getOperand(0); + if (VT == MVT::i8) { + // Zero extend to i32 since there is not an i8 bsr. + OpVT = MVT::i32; + Op = DAG.getNode(ISD::ZERO_EXTEND, DL, OpVT, Op); + } + + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + Op = DAG.getNode(X86ISD::BSR, DL, VTs, Op); + if (VT == MVT::i8) { + Op = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Op); + } + return Op; +} + static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -51899,6 +51955,9 @@ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget)) return FPLogic; + if (SDValue R = combineXorSubCTLZ(N, DAG, Subtarget)) + return R; + if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -54784,6 +54843,9 @@ Op1.getOperand(0)); } + if (SDValue V = combineXorSubCTLZ(N, DAG, Subtarget)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -979,14 +979,12 @@ ; ; X86-CLZ-LABEL: ctlz_bsr: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: xorl $31, %eax +; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_bsr: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: lzcntl %edi, %eax -; X64-CLZ-NEXT: xorl $31, %eax +; X64-CLZ-NEXT: bsrl %edi, %eax ; X64-CLZ-NEXT: retq ; ; X64-FASTLZCNT-LABEL: ctlz_bsr: @@ -1438,8 +1436,7 @@ ; X86-CLZ-LABEL: PR47603_zext: ; X86-CLZ: # %bb.0: ; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx -; X86-CLZ-NEXT: xorl $31, %ecx +; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %ecx ; X86-CLZ-NEXT: movsbl (%eax,%ecx), %eax ; X86-CLZ-NEXT: retl ; @@ -1559,18 +1556,14 @@ ; X86-CLZ-LABEL: ctlz_xor7_i8_true: ; X86-CLZ: # %bb.0: ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax -; X86-CLZ-NEXT: xorb $7, %al +; X86-CLZ-NEXT: bsrl %eax, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_xor7_i8_true: ; X64-CLZ: # %bb.0: ; X64-CLZ-NEXT: movzbl %dil, %eax -; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax -; X64-CLZ-NEXT: xorb $7, %al +; X64-CLZ-NEXT: bsrl %eax, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq ; @@ -1685,16 +1678,12 @@ ; ; X86-CLZ-LABEL: ctlz_xor15_i16_true: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax -; X86-CLZ-NEXT: xorl $15, %eax -; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax +; X86-CLZ-NEXT: bsrw {{[0-9]+}}(%esp), %ax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_xor15_i16_true: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: lzcntw %di, %ax -; X64-CLZ-NEXT: xorl $15, %eax -; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax +; X64-CLZ-NEXT: bsrw %di, %ax ; X64-CLZ-NEXT: retq ; ; X64-FASTLZCNT-LABEL: ctlz_xor15_i16_true: @@ -1895,8 +1884,7 @@ ; ; X64-CLZ-LABEL: ctlz_xor63_i64_true: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: lzcntq %rdi, %rax -; X64-CLZ-NEXT: xorq $63, %rax +; X64-CLZ-NEXT: bsrq %rdi, %rax ; X64-CLZ-NEXT: retq ; ; X64-FASTLZCNT-LABEL: ctlz_xor63_i64_true: