Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11419,8 +11419,82 @@ return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); } +// This tries to get rid of `select` and `icmp` that are being used to handle +// `Targets` that do not support `cttz(0)`. +static SDValue foldSelectOfCTTZ(SDNode *N, SelectionDAG &DAG) { + if (N->getNumOperands() != 3) + return SDValue(); + + SDValue Op1 = N->getOperand(0); + SDValue Op2 = N->getOperand(1); + SDValue Op3 = N->getOperand(2); + SDValue CTTZ; + + if (Op3.getOpcode() == ISD::TRUNCATE || Op3.getOpcode() == ISD::ZERO_EXTEND) { + CTTZ = Op3.getOperand(0); + } else { + CTTZ = N->getOperand(2); + } + + if (!CTTZ.getNumOperands()) + return SDValue(); + + if (CTTZ.getOpcode() != ISD::CTTZ && + CTTZ.getOpcode() != ISD::CTTZ_ZERO_UNDEF && + CTTZ.getOpcode() != RISCVISD::CTZW) + return SDValue(); + + // The `true` branch should be constant `0`. + auto ZeroConstant = dyn_cast(Op2); + if (!ZeroConstant || !ZeroConstant->isZero()) + return SDValue(); + + assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) && + "Illegal type in CTTZ folding"); + + if (Op1.getOpcode() != ISD::SETCC || Op1->getNumOperands() != 3) + return SDValue(); + + ISD::CondCode CCVal = cast(Op1->getOperand(2))->get(); + if (!ISD::isIntEqualitySetCC(CCVal)) { + return SDValue(); + } + SDValue Zero = Op1->getOperand(1); + if (!isNullConstant(Zero)) + return SDValue(); + + SDValue LHS = Op1->getOperand(0); + SDValue CTTZArgument = CTTZ->getOperand(0); + + if (LHS != CTTZArgument) + return SDValue(); + + unsigned BitWidth = CTTZ.getValueSizeInBits(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType()); + + auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ, + BitWidthMinusOne); + + if (CTTZ.getValueType() == N->getValueType(0)) + return AndNode; + + if (AndNode.getValueType() == llvm::MVT::i64 || + N->getValueType(0) == llvm::MVT::i32) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), AndNode); + } else if (AndNode.getValueType() == llvm::MVT::i32 || + N->getValueType(0) == llvm::MVT::i64) { + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), AndNode); + } + + return SDValue(); +} + static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + if (SDValue Folded = foldSelectOfCTTZ(N, DAG)) + return Folded; + if (Subtarget.hasShortForwardBranchOpt()) return SDValue(); Index: llvm/test/CodeGen/RISCV/cttz_zero_return_test.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/cttz_zero_return_test.ll @@ -0,0 +1,127 @@ +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ZBB + +@global_x = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local signext i32 @ctz_dereferencing_pointer(i64* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ld a0, 0(a0) +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = load i64, i64* %b, align 8 + %1 = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) + %2 = icmp eq i64 %0, 0 + %3 = trunc i64 %1 to i32 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +define dso_local signext i64 @ctz_dereferencing_pointer_zext(i32* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer_zext: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* %b, align 8 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %3 = zext i32 %1 to i64 + %4 = select i1 %2, i64 0, i64 %3 + ret i64 %4 +} + +define dso_local signext i32 @ctz1(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz1: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define dso_local signext i32 @ctz2(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz2: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define dso_local signext i32 @ctz3(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz3: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define dso_local signext i32 @ctz4(i64 noundef %b) { +; RV64ZBB-LABEL: ctz4: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %b, i1 true) + %1 = icmp eq i64 %b, 0 + %2 = trunc i64 %0 to i32 + %3 = select i1 %1, i32 0, i32 %2 + ret i32 %3 +} + +define dso_local signext i32 @ctz5(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz5: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define dso_local signext i32 @ctz6(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz6: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define dso_local signext i32 @globalVar() { +; RV64ZBB-LABEL: globalVar: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a0, %hi(global_x) +; RV64ZBB-NEXT: lw a0, %lo(global_x)(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* @global_x, align 4 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %conv = select i1 %2, i32 0, i32 %1 + ret i32 %conv +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) +declare i32 @llvm.cttz.i32(i32, i1 immarg)