Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17454,24 +17454,36 @@ if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); - // CSEL 0, cttz, cc -> AND cttz bitwidth-1 - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N3 = N->getOperand(3); + // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 + // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 unsigned CC = N->getConstantOperandVal(2); - - if (isNullConstant(N0) && CC == AArch64CC::EQ && - N3.getOpcode() == AArch64ISD::SUBS && - isNullConstant(N3.getValue(1).getOperand(1))) { - if (N1.getOpcode() == ISD::CTTZ || - (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::CTTZ)) { - unsigned BitWidth = - cast(N0)->getConstantIntValue()->getBitWidth(); - SDValue BitWidthMinusOne = - DAG.getConstant(BitWidth - 1, SDLoc(N), N1.getValueType()); - return DAG.getNode(ISD::AND, SDLoc(N), N1.getValueType(), N1, - BitWidthMinusOne); + SDValue SUBS = N->getOperand(3); + SDValue Zero, CTTZ; + bool IsEQOrNE = false; + + if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(0); + CTTZ = N->getOperand(1); + IsEQOrNE = true; + } + else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) { + Zero = N->getOperand(1); + CTTZ = N->getOperand(0); + IsEQOrNE = true; + } + + if (IsEQOrNE && + (CTTZ.getOpcode() == ISD::CTTZ || + (CTTZ.getOpcode() == ISD::TRUNCATE && + CTTZ.getOperand(0).getOpcode() == ISD::CTTZ))) { + if (isNullConstant(Zero) && + isNullConstant(SUBS.getValue(1).getOperand(1))) { + SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE ? CTTZ.getOperand(0).getOperand(0) : CTTZ.getOperand(0); + if(X == SUBS.getOperand(0)) { + unsigned BitWidth = CTTZ.getValueSizeInBits(); + SDValue BitWidthMinusOne = DAG.getConstant(BitWidth-1, SDLoc(N), CTTZ.getValueType()); + return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ, BitWidthMinusOne); + } } } Index: llvm/test/CodeGen/AArch64/table-based-cttz.ll =================================================================== --- llvm/test/CodeGen/AArch64/table-based-cttz.ll +++ llvm/test/CodeGen/AArch64/table-based-cttz.ll @@ -29,6 +29,30 @@ ret i64 %2 } +define i32 @cttzi32ne(i32 %x) { +; CHECK: rbit w8, w0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: and w0, w8, #0x1f +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %2 = select i1 %1, i32 %0, i32 0 + ret i32 %2 +} + +define i64 @cttzi64ne(i64 %x) { +; CHECK: rbit x8, x0 +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: and x0, x8, #0x3f +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true) + %1 = icmp ne i64 %x, 0 + %2 = select i1 %1, i64 %0, i64 0 + ret i64 %2 +} + define i32 @cttztrunc(i64 %x) { ; CHECK: rbit x8, x0 ; CHECK-NEXT: clz x8, x8 @@ -69,7 +93,7 @@ ret i32 %2 } -define i32 @cttzopnot0(i32 %x) { +define i32 @cttzlhsnot0(i32 %x) { ; CHECK: rbit w9, w0 ; CHECK-NEXT: mov w8, #10 ; CHECK-NEXT: clz w9, w9 @@ -95,6 +119,19 @@ ret i32 %2 } +define i32 @cttzlhsnotx(i32 %x, i32 %y) { +; CHECK: rbit w8, w0 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: clz w8, w8 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret +entry: + %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %y, 0 + %2 = select i1 %1, i32 0, i32 %0 + ret i32 %2 +} + declare i32 @llvm.cttz.i32(i32, i1) declare i64 @llvm.cttz.i64(i64, i1)