diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -74,6 +74,8 @@ bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -365,6 +365,14 @@ return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } +bool RISCVTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget.hasStdExtZbb(); +} + +bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget.hasStdExtZbb(); +} + bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { if (VT == MVT::f32 && !Subtarget.hasStdExtF()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -1055,8 +1055,13 @@ let Predicates = [HasStdExtZbb, IsRV64] in { def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), (CLZW GPR:$rs1)>; -// We don't pattern-match CTZW here as it has the same pattern and result as -// RV64 CTZ +// computeKnownBits can't figure out that the and mask on the add result is +// unnecessary so we need to pattern match it away. +def : Pat<(and (add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), + (i64 0xFFFFFFFF)), + (CLZW GPR:$rs1)>; +def : Pat<(cttz (or GPR:$rs1, (i64 0x100000000))), + (CTZW GPR:$rs1)>; def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>; } // Predicates = [HasStdExtZbb, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll --- a/llvm/test/CodeGen/RISCV/rv32Zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll @@ -362,22 +362,12 @@ ; ; RV32IB-LABEL: ctlz_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beqz a0, .LBB8_2 -; RV32IB-NEXT: # %bb.1: # %cond.false ; RV32IB-NEXT: clz a0, a0 ; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB8_2: -; RV32IB-NEXT: addi a0, zero, 32 -; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: ctlz_i32: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: beqz a0, .LBB8_2 -; RV32IBB-NEXT: # %bb.1: # %cond.false ; RV32IBB-NEXT: clz a0, a0 -; RV32IBB-NEXT: ret -; RV32IBB-NEXT: .LBB8_2: -; RV32IBB-NEXT: addi a0, zero, 32 ; RV32IBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %1 @@ -545,22 +535,12 @@ ; ; RV32IB-LABEL: cttz_i32: ; RV32IB: # %bb.0: -; RV32IB-NEXT: beqz a0, .LBB10_2 -; RV32IB-NEXT: # %bb.1: # %cond.false ; RV32IB-NEXT: ctz a0, a0 ; RV32IB-NEXT: ret -; RV32IB-NEXT: .LBB10_2: -; RV32IB-NEXT: addi a0, zero, 32 -; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: cttz_i32: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: beqz a0, .LBB10_2 -; RV32IBB-NEXT: # %bb.1: # %cond.false ; RV32IBB-NEXT: ctz a0, a0 -; RV32IBB-NEXT: ret -; RV32IBB-NEXT: .LBB10_2: -; RV32IBB-NEXT: addi a0, zero, 32 ; RV32IBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll @@ -290,22 +290,12 @@ ; ; RV64IB-LABEL: ctlz_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: beqz a0, .LBB9_2 -; RV64IB-NEXT: # %bb.1: # %cond.false ; RV64IB-NEXT: clzw a0, a0 ; RV64IB-NEXT: ret -; RV64IB-NEXT: .LBB9_2: -; RV64IB-NEXT: addi a0, zero, 32 -; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: ctlz_i32: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: beqz a0, .LBB9_2 -; RV64IBB-NEXT: # %bb.1: # %cond.false ; RV64IBB-NEXT: clzw a0, a0 -; RV64IBB-NEXT: ret -; RV64IBB-NEXT: .LBB9_2: -; RV64IBB-NEXT: addi a0, zero, 32 ; RV64IBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %1 @@ -385,22 +375,12 @@ ; ; RV64IB-LABEL: ctlz_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: beqz a0, .LBB10_2 -; RV64IB-NEXT: # %bb.1: # %cond.false ; RV64IB-NEXT: clz a0, a0 ; RV64IB-NEXT: ret -; RV64IB-NEXT: .LBB10_2: -; RV64IB-NEXT: addi a0, zero, 64 -; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: ctlz_i64: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: beqz a0, .LBB10_2 -; RV64IBB-NEXT: # %bb.1: # %cond.false ; RV64IBB-NEXT: clz a0, a0 -; RV64IBB-NEXT: ret -; RV64IBB-NEXT: .LBB10_2: -; RV64IBB-NEXT: addi a0, zero, 64 ; RV64IBB-NEXT: ret %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) ret i64 %1 @@ -470,22 +450,12 @@ ; ; RV64IB-LABEL: cttz_i32: ; RV64IB: # %bb.0: -; RV64IB-NEXT: beqz a0, .LBB11_2 -; RV64IB-NEXT: # %bb.1: # %cond.false -; RV64IB-NEXT: ctz a0, a0 -; RV64IB-NEXT: ret -; RV64IB-NEXT: .LBB11_2: -; RV64IB-NEXT: addi a0, zero, 32 +; RV64IB-NEXT: ctzw a0, a0 ; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: cttz_i32: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: beqz a0, .LBB11_2 -; RV64IBB-NEXT: # %bb.1: # %cond.false -; RV64IBB-NEXT: ctz a0, a0 -; RV64IBB-NEXT: ret -; RV64IBB-NEXT: .LBB11_2: -; RV64IBB-NEXT: addi a0, zero, 32 +; RV64IBB-NEXT: ctzw a0, a0 ; RV64IBB-NEXT: ret %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %1 @@ -555,22 +525,12 @@ ; ; RV64IB-LABEL: cttz_i64: ; RV64IB: # %bb.0: -; RV64IB-NEXT: beqz a0, .LBB12_2 -; RV64IB-NEXT: # %bb.1: # %cond.false ; RV64IB-NEXT: ctz a0, a0 ; RV64IB-NEXT: ret -; RV64IB-NEXT: .LBB12_2: -; RV64IB-NEXT: addi a0, zero, 64 -; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: cttz_i64: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: beqz a0, .LBB12_2 -; RV64IBB-NEXT: # %bb.1: # %cond.false ; RV64IBB-NEXT: ctz a0, a0 -; RV64IBB-NEXT: ret -; RV64IBB-NEXT: .LBB12_2: -; RV64IBB-NEXT: addi a0, zero, 64 ; RV64IBB-NEXT: ret %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) ret i64 %1