Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17454,23 +17454,24 @@ if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); - // CSEL 0, cttz, cc -> AND cttz numbits-1 + // CSEL 0, cttz, cc -> AND cttz bitwidth-1 + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N3 = N->getOperand(3); + unsigned CC = N->getConstantOperandVal(2); - if (N3.getOpcode() == AArch64ISD::SUBS && + if (isNullConstant(N0) && CC == AArch64CC::EQ && + N3.getOpcode() == AArch64ISD::SUBS && isNullConstant(N3.getValue(1).getOperand(1))) { - if (N1.getOpcode() == ISD::CTTZ) { - SDValue NumBitsMinusOne = - DAG.getConstant(31, SDLoc(N), N1.getValueType()); - return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, - NumBitsMinusOne); - } else if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::CTTZ) { - SDValue NumBitsMinusOne = - DAG.getConstant(63, SDLoc(N), N1.getValueType()); + if (N1.getOpcode() == ISD::CTTZ || + (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::CTTZ)) { + unsigned BitWidth = + cast(N0)->getConstantIntValue()->getBitWidth(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), N1.getValueType()); return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, - NumBitsMinusOne); + BitWidthMinusOne); } } Index: llvm/test/CodeGen/AArch64/table-based-cttz.ll =================================================================== --- llvm/test/CodeGen/AArch64/table-based-cttz.ll +++ llvm/test/CodeGen/AArch64/table-based-cttz.ll @@ -4,7 +4,8 @@ ;; CSEL 0, cttz, cc -> AND cttz numbits-1 ;; for cttz in the case of i32 and i64 respectively -define i32 @ctz1(i32 %x) { +;; Cases for which the optimzation takes place +define i32 @cttzi32(i32 %x) { ; CHECK: rbit w8, w0 ; CHECK-NEXT: clz w8, w8 ; CHECK-NEXT: and w0, w8, #0x1f @@ -16,19 +17,86 @@ ret i32 %2 } -define i32 @ctz2(i64 %x) { +define i64 @cttzi64(i64 %x) { ; CHECK: rbit x8, x0 ; CHECK-NEXT: clz x8, x8 -; CHECK-NEXT: and w0, w8, #0x3f +; CHECK-NEXT: and x0, x8, #0x3f ; CHECK-NEXT: ret entry: %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) %1 = icmp eq i64 %x, 0 - %2 = trunc i64 %0 to i32 - %3 = select i1 %1, i32 0, i32 %2 + %2 = select i1 %1, i64 0, i64 %0 + ret i64 %2 +} + +define i32 @cttztrunc(i64 %x) { +; CHECK: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and w0, w8, #0x1f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp eq i64 %x, 0 + %2 = select i1 %1, i64 0, i64 %0 + %3 = trunc i64 %2 to i32 ret i32 %3 } -declare i32 @llvm.cttz.i32(i32, i1 immarg) +;; Cases for which the optimization does not take place +define i32 @cttzne(i32 %x) { +; CHECK: rbit w8, w0 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, ne +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @cttzxnot0(i32 %x) { +; CHECK: rbit w8, w0 +; CHECK-NEXT: cmp w0, #10 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 10 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +define i32 @cttzopnot0(i32 %x) { +; CHECK: rbit w9, w0 +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: clz w9, w9 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 10, i32 %0 + ret i32 %2 +} + +define i32 @notcttz(i32 %x) { +; CHECK: clz w8, w0 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +declare i64 @llvm.cttz.i64(i64, i1) -declare i64 @llvm.cttz.i64(i64, i1 immarg) +declare i32 @llvm.ctlz.i32(i32, i1)