Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11419,8 +11419,71 @@ return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); } +// This tries to get rid of `select` and `icmp` that are being used to handle +// `Targets` that do not support `cttz(0)`. +static SDValue foldSelectOfCTTZ(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue CTTZ; + SDValue ValOnZero; + + if (Cond.getOpcode() != ISD::SETCC) + return SDValue(); + + if (!isNullConstant(Cond->getOperand(1))) + return SDValue(); + + ISD::CondCode CCVal = cast(Cond->getOperand(2))->get(); + if (CCVal == ISD::CondCode::SETEQ) { + CTTZ = N->getOperand(2); + ValOnZero = N->getOperand(1); + } else if (CCVal == ISD::CondCode::SETNE) { + CTTZ = N->getOperand(1); + ValOnZero = N->getOperand(2); + } else { + return SDValue(); + } + + if (CTTZ.getOpcode() == ISD::TRUNCATE || CTTZ.getOpcode() == ISD::ZERO_EXTEND) + CTTZ = CTTZ.getOperand(0); + + if (CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::CTTZ_ZERO_UNDEF) + return SDValue(); + + if (!isNullConstant(ValOnZero)) + return SDValue(); + + SDValue LHS = Cond->getOperand(0); + SDValue CTTZArgument = CTTZ->getOperand(0); + + if (LHS != CTTZArgument) + return SDValue(); + + SDValue CTTZZeroDef; + if (CTTZ.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { + CTTZZeroDef = + DAG.getNode(ISD::CTTZ, SDLoc(CTTZ), CTTZ.getValueType(), CTTZArgument); + } else { + CTTZZeroDef = CTTZ; + } + + unsigned BitWidth = CTTZZeroDef.getValueSizeInBits(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZZeroDef.getValueType()); + + auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CTTZZeroDef.getValueType(), + CTTZZeroDef, BitWidthMinusOne); + + if (CTTZZeroDef.getValueType() == N->getValueType(0)) + return AndNode; + + return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); +} + static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + if (SDValue Folded = foldSelectOfCTTZ(N, DAG)) + return Folded; + if (Subtarget.hasShortForwardBranchOpt()) return SDValue(); Index: llvm/test/CodeGen/RISCV/cttz_zero_return_test.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/cttz_zero_return_test.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ZBB + +@global_x = global i32 0, align 4 + +define signext i32 @ctz_dereferencing_pointer(i64* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ld a0, 0(a0) +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = load i64, i64* %b, align 8 + %1 = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) + %2 = icmp eq i64 %0, 0 + %3 = trunc i64 %1 to i32 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +define i64 @ctz_dereferencing_pointer_zext(i32* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer_zext: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* %b, align 8 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %3 = zext i32 %1 to i64 + %4 = select i1 %2, i64 0, i64 %3 + ret i64 %4 +} + +define signext i32 @ctz1(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz1: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @ctz1_flipped(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz1_flipped: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %conv = select i1 %1, i32 %0, i32 0 + ret i32 %conv +} + +define signext i32 @ctz2(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz2: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define signext i32 @ctz3(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz3: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define signext i32 @ctz4(i64 noundef %b) { +; RV64ZBB-LABEL: ctz4: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %b, i1 true) + %1 = icmp eq i64 %b, 0 + %2 = trunc i64 %0 to i32 + %3 = select i1 %1, i32 0, i32 %2 + ret i32 %3 +} + +define signext i32 @ctz5(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz5: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @ctz6(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz6: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @globalVar() { +; RV64ZBB-LABEL: globalVar: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a0, %hi(global_x) +; RV64ZBB-NEXT: lw a0, %lo(global_x)(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* @global_x, align 4 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %conv = select i1 %2, i32 0, i32 %1 + ret i32 %conv +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) +declare i32 @llvm.cttz.i32(i32, i1 immarg)