diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -651,6 +651,11 @@ return false; } + /// Return true if ctpop instruction is fast. + virtual bool isCtpopFast(EVT VT) const { + return isOperationLegal(ISD::CTPOP, VT); + } + /// Return the maximum number of "x & (x - 1)" operations that can be done /// instead of deferring to a custom CTPOP. virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4088,8 +4088,8 @@ // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { - // Keep the CTPOP if it is a legal vector op. - if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is a cheap vector op. + if (CTVT.isVector() && TLI.isCtpopFast(CTVT)) return SDValue(); unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); @@ -4114,8 +4114,8 @@ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { - // Keep the CTPOP if it is legal. - if (TLI.isOperationLegal(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is cheap. + if (TLI.isCtpopFast(CTVT)) return SDValue(); SDValue Zero = DAG.getConstant(0, dl, CTVT); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -592,6 +592,10 @@ } bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } + bool isCtpopFast(EVT VT) const override; + + unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; + bool preferZeroCompareBranch() const override { return true; } bool shouldInsertFencesForAtomic(const Instruction *I) const override { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17497,6 +17497,17 @@ return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); } +bool RISCVTargetLowering::isCtpopFast(EVT VT) const { + if (VT.isVector()) + return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); + return Subtarget.hasStdExtZbb() && (VT == MVT::i32 || VT == MVT::i64); +} + +unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, + ISD::CondCode Cond) const { + return isCtpopFast(VT) ? 0 : 1; +} + namespace llvm::RISCVVIntrinsicsTable { #define GET_RISCVVIntrinsicsTable_IMPL diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -309,6 +309,85 @@ ret i32 %1 } +define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { +; RV32I-LABEL: ctpop_i32_ult_two: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i32_ult_two: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ult i32 %1, 2 + ret i1 %2 +} + +define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind { +; RV32I-LABEL: ctpop_i32_ugt_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i32_ugt_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ugt i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind { +; RV32I-LABEL: ctpop_i32_eq_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i32_eq_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: seqz a0, a0 +; RV32ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp eq i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { +; RV32I-LABEL: ctpop_i32_ne_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i32_ne_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: snez a0, a0 +; RV32ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ne i32 %1, 1 + ret i1 %2 +} + declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { @@ -380,6 +459,111 @@ ret i64 %1 } +define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64_ugt_two: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, a0, -1 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i64_ugt_two: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ult i64 %1, 2 + ret i1 %2 +} + +define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64_ugt_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, a0, -1 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i64_ugt_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: xori a0, a0, 1 +; RV32ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ugt i64 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i64_eq_one(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64_eq_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, a0, -1 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: seqz a3, a0 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i64_eq_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: seqz a0, a0 +; RV32ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp eq i64 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i64_ne_one(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64_ne_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a2, a0, -1 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: seqz a3, a0 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: and a3, a1, a3 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop_i64_ne_one: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: snez a0, a0 +; RV32ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ne i64 %1, 1 + ret i1 %2 +} + define i32 @sextb_i32(i32 %a) nounwind { ; RV32I-LABEL: sextb_i32: ; RV32I: # %bb.0: @@ -451,10 +635,10 @@ define i32 @min_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: min_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: blt a0, a1, .LBB10_2 +; RV32I-NEXT: blt a0, a1, .LBB18_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: .LBB18_2: ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: min_i32: @@ -474,18 +658,18 @@ define i64 @min_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: min_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: beq a1, a3, .LBB11_2 +; CHECK-NEXT: beq a1, a3, .LBB19_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: slt a4, a1, a3 -; CHECK-NEXT: beqz a4, .LBB11_3 -; CHECK-NEXT: j .LBB11_4 -; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: beqz a4, .LBB19_3 +; CHECK-NEXT: j .LBB19_4 +; CHECK-NEXT: .LBB19_2: ; CHECK-NEXT: sltu a4, a0, a2 -; CHECK-NEXT: bnez a4, .LBB11_4 -; CHECK-NEXT: .LBB11_3: +; CHECK-NEXT: bnez a4, .LBB19_4 +; CHECK-NEXT: .LBB19_3: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB11_4: +; CHECK-NEXT: .LBB19_4: ; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b @@ -495,10 +679,10 @@ define i32 @max_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: max_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: blt a1, a0, .LBB12_2 +; RV32I-NEXT: blt a1, a0, .LBB20_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB12_2: +; RV32I-NEXT: .LBB20_2: ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: max_i32: @@ -518,18 +702,18 @@ define i64 @max_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: max_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: beq a1, a3, .LBB13_2 +; CHECK-NEXT: beq a1, a3, .LBB21_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: slt a4, a3, a1 -; CHECK-NEXT: beqz a4, .LBB13_3 -; CHECK-NEXT: j .LBB13_4 -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: beqz a4, .LBB21_3 +; CHECK-NEXT: j .LBB21_4 +; CHECK-NEXT: .LBB21_2: ; CHECK-NEXT: sltu a4, a2, a0 -; CHECK-NEXT: bnez a4, .LBB13_4 -; CHECK-NEXT: .LBB13_3: +; CHECK-NEXT: bnez a4, .LBB21_4 +; CHECK-NEXT: .LBB21_3: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: .LBB21_4: ; CHECK-NEXT: ret %cmp = icmp sgt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b @@ -539,10 +723,10 @@ define i32 @minu_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: minu_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bltu a0, a1, .LBB14_2 +; RV32I-NEXT: bltu a0, a1, .LBB22_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB14_2: +; RV32I-NEXT: .LBB22_2: ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: minu_i32: @@ -562,18 +746,18 @@ define i64 @minu_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: minu_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: beq a1, a3, .LBB15_2 +; CHECK-NEXT: beq a1, a3, .LBB23_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: beqz a4, .LBB15_3 -; CHECK-NEXT: j .LBB15_4 -; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: beqz a4, .LBB23_3 +; CHECK-NEXT: j .LBB23_4 +; CHECK-NEXT: .LBB23_2: ; CHECK-NEXT: sltu a4, a0, a2 -; CHECK-NEXT: bnez a4, .LBB15_4 -; CHECK-NEXT: .LBB15_3: +; CHECK-NEXT: bnez a4, .LBB23_4 +; CHECK-NEXT: .LBB23_3: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB15_4: +; CHECK-NEXT: .LBB23_4: ; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b @@ -583,10 +767,10 @@ define i32 @maxu_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: maxu_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bltu a1, a0, .LBB16_2 +; RV32I-NEXT: bltu a1, a0, .LBB24_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: .LBB16_2: +; RV32I-NEXT: .LBB24_2: ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: maxu_i32: @@ -606,18 +790,18 @@ define i64 @maxu_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: maxu_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: beq a1, a3, .LBB17_2 +; CHECK-NEXT: beq a1, a3, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: sltu a4, a3, a1 -; CHECK-NEXT: beqz a4, .LBB17_3 -; CHECK-NEXT: j .LBB17_4 -; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: beqz a4, .LBB25_3 +; CHECK-NEXT: j .LBB25_4 +; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: sltu a4, a2, a0 -; CHECK-NEXT: bnez a4, .LBB17_4 -; CHECK-NEXT: .LBB17_3: +; CHECK-NEXT: bnez a4, .LBB25_4 +; CHECK-NEXT: .LBB25_3: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: .LBB25_4: ; CHECK-NEXT: ret %cmp = icmp ugt i64 %a, %b %cond = select i1 %cmp, i64 %a, i64 %b @@ -648,13 +832,13 @@ define i64 @abs_i64(i64 %x) { ; CHECK-LABEL: abs_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: bgez a1, .LBB19_2 +; CHECK-NEXT: bgez a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: snez a2, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: neg a1, a1 ; CHECK-NEXT: sub a1, a1, a2 -; CHECK-NEXT: .LBB19_2: +; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) ret i64 %abs diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -578,6 +578,85 @@ ret i32 %1 } +define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32_ult_two: +; RV64I: # %bb.0: +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i32_ult_two: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 +; RV64ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ult i32 %1, 2 + ret i1 %2 +} + +define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32_ugt_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i32_ugt_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ugt i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32_eq_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: seqz a1, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i32_eq_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp eq i32 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32_ne_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: snez a1, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i32_ne_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: snez a0, a0 +; RV64ZBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + %2 = icmp ne i32 %1, 1 + ret i1 %2 +} + define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-LABEL: ctpop_i32_load: ; RV64I: # %bb.0: @@ -665,6 +744,85 @@ ret i64 %1 } +define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64_ugt_two: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i64_ugt_two: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 +; RV64ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ult i64 %1, 2 + ret i1 %2 +} + +define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64_ugt_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i64_ugt_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 +; RV64ZBB-NEXT: xori a0, a0, 1 +; RV64ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ugt i64 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i64_eq_one(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64_eq_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: seqz a1, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i64_eq_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp eq i64 %1, 1 + ret i1 %2 +} + +define i1 @ctpop_i64_ne_one(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64_ne_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: snez a1, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop_i64_ne_one: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: snez a0, a0 +; RV64ZBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + %2 = icmp ne i64 %1, 1 + ret i1 %2 +} + define signext i32 @sextb_i32(i32 signext %a) nounwind { ; RV64I-LABEL: sextb_i32: ; RV64I: # %bb.0: @@ -732,10 +890,10 @@ define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: min_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB18_2 +; RV64I-NEXT: blt a0, a1, .LBB26_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: .LBB26_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: min_i32: @@ -750,10 +908,10 @@ define i64 @min_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: min_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB19_2 +; RV64I-NEXT: blt a0, a1, .LBB27_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: .LBB27_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: min_i64: @@ -768,10 +926,10 @@ define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: max_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a1, a0, .LBB20_2 +; RV64I-NEXT: blt a1, a0, .LBB28_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: .LBB28_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: max_i32: @@ -786,10 +944,10 @@ define i64 @max_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: max_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a1, a0, .LBB21_2 +; RV64I-NEXT: blt a1, a0, .LBB29_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB21_2: +; RV64I-NEXT: .LBB29_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: max_i64: @@ -804,10 +962,10 @@ define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: minu_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB22_2 +; RV64I-NEXT: bltu a0, a1, .LBB30_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB22_2: +; RV64I-NEXT: .LBB30_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: minu_i32: @@ -822,10 +980,10 @@ define i64 @minu_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: minu_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB23_2 +; RV64I-NEXT: bltu a0, a1, .LBB31_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB23_2: +; RV64I-NEXT: .LBB31_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: minu_i64: @@ -840,10 +998,10 @@ define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: maxu_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a1, a0, .LBB24_2 +; RV64I-NEXT: bltu a1, a0, .LBB32_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB24_2: +; RV64I-NEXT: .LBB32_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: maxu_i32: @@ -858,10 +1016,10 @@ define i64 @maxu_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: maxu_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a1, a0, .LBB25_2 +; RV64I-NEXT: bltu a1, a0, .LBB33_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB25_2: +; RV64I-NEXT: .LBB33_2: ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: maxu_i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -857,6 +857,91 @@ %a = call @llvm.ctpop.nxv16i32( %va) ret %a } + +define @ctpop_nxv16i32_ult_two( %va) { +; CHECK-LABEL: ctpop_nxv16i32_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ult_two: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv16i32( %va) + %cmp = icmp ult %a, shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv16i32_ugt_one( %va) { +; CHECK-LABEL: ctpop_nxv16i32_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ugt_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv16i32( %va) + %cmp = icmp ugt %a, shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv16i32_eq_one( %va) { +; CHECK-LABEL: ctpop_nxv16i32_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v16, v8, v16 +; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmsne.vi v16, v8, 0 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv16i32_eq_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv16i32( %va) + %cmp = icmp eq %a, shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv16i32_ne_one( %va) { +; CHECK-LABEL: ctpop_nxv16i32_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v16, v8, v16 +; CHECK-NEXT: vmsne.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v8, 0 +; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ne_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv16i32( %va) + %cmp = icmp ne %a, shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) + ret %cmp +} + declare @llvm.ctpop.nxv16i32() define @ctpop_nxv1i64( %va) { @@ -1217,4 +1302,89 @@ %a = call @llvm.ctpop.nxv8i64( %va) ret %a } + +define @ctpop_nxv8i64_ult_two( %va) { +; CHECK-LABEL: ctpop_nxv8i64_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ult_two: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv8i64( %va) + %cmp = icmp ult %a, shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv8i64_ugt_one( %va) { +; CHECK-LABEL: ctpop_nxv8i64_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ugt_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv8i64( %va) + %cmp = icmp ugt %a, shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv8i64_eq_one( %va) { +; CHECK-LABEL: ctpop_nxv8i64_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v16, v8, v16 +; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmsne.vi v16, v8, 0 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv8i64_eq_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv8i64( %va) + %cmp = icmp eq %a, shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) + ret %cmp +} + +define @ctpop_nxv8i64_ne_one( %va) { +; CHECK-LABEL: ctpop_nxv8i64_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vand.vv v16, v8, v16 +; CHECK-NEXT: vmsne.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v8, 0 +; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ne_one: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1 +; CHECK-ZVBB-NEXT: ret + %a = call @llvm.ctpop.nxv8i64( %va) + %cmp = icmp ne %a, shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) + ret %cmp +} + declare @llvm.ctpop.nxv8i64()