Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17454,13 +17454,24 @@ if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); - // CSEL cttz, 0, cc -> AND cttz 31 + // CSEL 0, cttz, cc -> AND cttz numbits-1 SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - bool isZero = cast(N2.getNode())->isZero(); - if (N1.getOpcode() == ISD::CTTZ && isZero) { - SDValue thirtyOne = DAG.getConstant(31, SDLoc(N), N1.getValueType()); - return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, thirtyOne); + SDValue N3 = N->getOperand(3); + + if (N3.getOpcode() == AArch64ISD::SUBS && + isNullConstant(N3.getValue(1).getOperand(1))) { + if (N1.getOpcode() == ISD::CTTZ) { + SDValue NumBitsMinusOne = + DAG.getConstant(31, SDLoc(N), N1.getValueType()); + return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, + NumBitsMinusOne); + } else if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::CTTZ) { + SDValue NumBitsMinusOne = + DAG.getConstant(63, SDLoc(N), N1.getValueType()); + return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, + NumBitsMinusOne); + } } return performCONDCombine(N, DCI, DAG, 2, 3); Index: llvm/test/CodeGen/AArch64/table-based-cttz.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/table-based-cttz.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +;; Check the transformation +;; CSEL 0, cttz, cc -> AND cttz numbits-1 +;; for cttz in the case of i32 and i64 respectively + +define i32 @ctz1(i32 %x) { +; CHECK: rbit w8, w0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: and w0, w8, #0x1f +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @ctz2(i64 %x) { +; CHECK: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and w0, w8, #0x3f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp eq i64 %x, 0 + %2 = trunc i64 %0 to i32 + %3 = select i1 %1, i32 0, i32 %2 + ret i32 %3 +} + +declare i32 @llvm.cttz.i32(i32, i1 immarg) + +declare i64 @llvm.cttz.i64(i64, i1 immarg)