Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11419,8 +11419,85 @@ return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); } +// This tries to get rid of `select` and `icmp` that are being used to handle +// `Targets` that do not support `cttz(0)`/`ctlz(0)`. +static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + + // This represents eihter CTTZ or CTLZ instruction. + SDValue CTZ; + + SDValue ValOnZero; + + if (Cond.getOpcode() != ISD::SETCC) + return SDValue(); + + if (!isNullConstant(Cond->getOperand(1))) + return SDValue(); + + ISD::CondCode CCVal = cast(Cond->getOperand(2))->get(); + if (CCVal == ISD::CondCode::SETEQ) { + CTZ = N->getOperand(2); + ValOnZero = N->getOperand(1); + } else if (CCVal == ISD::CondCode::SETNE) { + CTZ = N->getOperand(1); + ValOnZero = N->getOperand(2); + } else { + return SDValue(); + } + + if (CTZ.getOpcode() == ISD::TRUNCATE || CTZ.getOpcode() == ISD::ZERO_EXTEND) + CTZ = CTZ.getOperand(0); + + if (CTZ.getOpcode() != ISD::CTTZ && CTZ.getOpcode() != ISD::CTTZ_ZERO_UNDEF && + CTZ.getOpcode() != ISD::CTLZ && CTZ.getOpcode() != ISD::CTLZ_ZERO_UNDEF) + return SDValue(); + + if (!isNullConstant(ValOnZero)) + return SDValue(); + + SDValue LHS = Cond->getOperand(0); + SDValue CTZArgument = CTZ->getOperand(0); + + if (LHS != CTZArgument) + return SDValue(); + + SDValue CTZZeroDef; + switch (CTZ.getOpcode()) { + case ISD::CTTZ: + case ISD::CTLZ: + CTZZeroDef = CTZ; + break; + case ISD::CTTZ_ZERO_UNDEF: + CTZZeroDef = + DAG.getNode(ISD::CTTZ, SDLoc(CTZ), CTZ.getValueType(), CTZArgument); + break; + case ISD::CTLZ_ZERO_UNDEF: + CTZZeroDef = + DAG.getNode(ISD::CTLZ, SDLoc(CTZ), CTZ.getValueType(), CTZArgument); + break; + default: + llvm_unreachable("Unhandled node"); + } + + unsigned BitWidth = CTZZeroDef.getValueSizeInBits(); + SDValue BitWidthMinusOne = + DAG.getConstant(BitWidth - 1, SDLoc(N), CTZZeroDef.getValueType()); + + auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CTZZeroDef.getValueType(), + CTZZeroDef, BitWidthMinusOne); + + if (CTZZeroDef.getValueType() == N->getValueType(0)) + return AndNode; + + return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); +} + static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) + return Folded; + if (Subtarget.hasShortForwardBranchOpt()) return SDValue(); Index: llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -0,0 +1,156 @@ +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ZBB + +@global_x = global i32 0, align 4 + +define signext i32 @ctz_dereferencing_pointer(i64* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ld a0, 0(a0) +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = load i64, i64* %b, align 8 + %1 = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) + %2 = icmp eq i64 %0, 0 + %3 = trunc i64 %1 to i32 + %4 = select i1 %2, i32 0, i32 %3 + ret i32 %4 +} + +define i64 @ctz_dereferencing_pointer_zext(i32* nocapture noundef readonly %b) { +; RV64ZBB-LABEL: ctz_dereferencing_pointer_zext: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* %b, align 8 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %3 = zext i32 %1 to i64 + %4 = select i1 %2, i64 0, i64 %3 + ret i64 %4 +} + +define signext i32 @ctz1(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz1: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @ctz1_flipped(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz1_flipped: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp ne i32 %x, 0 + %conv = select i1 %1, i32 %0, i32 0 + ret i32 %conv +} + +define signext i32 @ctz2(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz2: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define signext i32 @ctz3(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz3: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) + ret i32 %0 +} + +define signext i32 @ctz4(i64 noundef %b) { +; RV64ZBB-LABEL: ctz4: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %b, i1 true) + %1 = icmp eq i64 %b, 0 + %2 = trunc i64 %0 to i32 + %3 = select i1 %1, i32 0, i32 %2 + ret i32 %3 +} + +define signext i32 @ctlz(i64 noundef %b) { +; RV64ZBB-LABEL: ctlz: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: clz a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 63 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %b, i1 true) + %1 = icmp eq i64 %b, 0 + %2 = trunc i64 %0 to i32 + %3 = select i1 %1, i32 0, i32 %2 + ret i32 %3 +} + +define signext i32 @ctz5(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz5: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @ctz6(i32 noundef signext %x) { +; RV64ZBB-LABEL: ctz6: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %1 = icmp eq i32 %x, 0 + %conv = select i1 %1, i32 0, i32 %0 + ret i32 %conv +} + +define signext i32 @globalVar() { +; RV64ZBB-LABEL: globalVar: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a0, %hi(global_x) +; RV64ZBB-NEXT: lw a0, %lo(global_x)(a0) +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 31 +; RV64ZBB-NEXT: ret +entry: + %0 = load i32, i32* @global_x, align 4 + %1 = tail call i32 @llvm.cttz.i32(i32 %0, i1 true) + %2 = icmp eq i32 %0, 0 + %conv = select i1 %2, i32 0, i32 %1 + ret i32 %conv +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) +declare i32 @llvm.cttz.i32(i32, i1 immarg) +declare i64 @llvm.ctlz.i64(i64, i1 immarg) +