Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -45,6 +45,12 @@ bool SelectAddrFI(SDValue Addr, SDValue &Base); + bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt); + bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt); + // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -160,6 +160,196 @@ return false; } +// Check that it is a SLOI (Shift Left Ones Immediate). We first check that +// it is the right node tree: +// +// (OR (SHL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskTrailingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::OR) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = Or.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(Or.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC1 = Or.getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + if (XLenVT == MVT::i32) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a SROI (Shift Right Ones Immediate). We first check that +// it is the right node tree: +// +// (OR (SRL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskLeadingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) { + MVT XLenVT = Subtarget->getXLenVT(); + if (N.getOpcode() == ISD::OR) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SRL) { + SDValue Srl = Or.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Or.getOperand(1))) { + if (XLenVT == MVT::i64) { + uint64_t VC1 = Or.getConstantOperandVal(1); + uint64_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + if (XLenVT == MVT::i32) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 +// on RV64). +// SLLIUW is the same as SLLI except for the fact that it clears the bits +// XLEN-1:32 of the input RS1 before shifting. +// We first check that it is the right node tree: +// +// (AND (SHL RS1, VC2), VC1) +// +// We check that VC2, the shamt is less than 32, otherwise the pattern is +// exactly the same as SLLI and we give priority to that. +// Eventually we check that that VC1, the mask used to clear the upper 32 bits +// of RS1, is correct: +// +// VC1 == (0xFFFFFFFF << VC2) + +bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) { + SDValue And = N; + if (And.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = And.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(And.getOperand(1))) { + uint64_t VC1 = And.getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + return false; +} + +// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64). +// We first check that it is the right node tree: +// +// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1)) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskTrailingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (Subtarget->getXLenVT() == MVT::i64 && + N.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(N.getOperand(1))->getVT() == MVT::i32) { + if (N.getOperand(0).getOpcode() == ISD::OR) { + SDValue Or = N.getOperand(0); + if (Or.getOperand(0).getOpcode() == ISD::SHL) { + SDValue Shl = Or.getOperand(0); + if (isa(Shl.getOperand(1)) && + isa(Or.getOperand(1))) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Shl.getConstantOperandVal(1); + if (VC1 == maskTrailingOnes(VC2)) { + RS1 = Shl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Shl.getOperand(1).getValueType()); + return true; + } + } + } + } + } + return false; +} + +// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). +// We first check that it is the right node tree: +// +// (OR (SHL RS1, VC2), VC1) +// +// and then we check that VC1, the mask used to fill with ones, is compatible +// with VC2, the shamt: +// +// VC1 == maskLeadingOnes(VC2) + +bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) { + if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) { + SDValue Or = N; + if (Or.getOperand(0).getOpcode() == ISD::SRL) { + SDValue Srl = Or.getOperand(0); + if (isa(Srl.getOperand(1)) && + isa(Or.getOperand(1))) { + uint32_t VC1 = Or.getConstantOperandVal(1); + uint32_t VC2 = Srl.getConstantOperandVal(1); + if (VC1 == maskLeadingOnes(VC2)) { + RS1 = Srl.getOperand(0); + Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N), + Srl.getOperand(1).getValueType()); + return true; + } + } + } + } + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -151,9 +151,12 @@ setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); setOperationAction(ISD::BSWAP, XLenVT, Expand); - setOperationAction(ISD::CTTZ, XLenVT, Expand); - setOperationAction(ISD::CTLZ, XLenVT, Expand); - setOperationAction(ISD::CTPOP, XLenVT, Expand); + + if (!Subtarget.hasStdExtZbb()) { + setOperationAction(ISD::CTTZ, XLenVT, Expand); + setOperationAction(ISD::CTLZ, XLenVT, Expand); + setOperationAction(ISD::CTPOP, XLenVT, Expand); + } ISD::CondCode FPCCToExtend[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, Index: llvm/lib/Target/RISCV/RISCVInstrInfoB.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -632,3 +632,79 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0), (C_ZEXTW GPRC:$rs1)>; } // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64] + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// +def SLOIPat : ComplexPattern; +def SROIPat : ComplexPattern; +def SLLIUWPat : ComplexPattern; +def SLOIWPat : ComplexPattern; +def SROIWPat : ComplexPattern; + +let Predicates = [HasStdExtZbb] in { +def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SLO GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SRO GPR:$rs1, GPR:$rs2)>; +def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt), + (SROI GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>; +def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>; +def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbb, IsRV32] in +def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>; +let Predicates = [HasStdExtZbb, IsRV64] in +def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>; + +let Predicates = [HasStdExtZbb, IsRV32] in +def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>; +let Predicates = [HasStdExtZbb, IsRV64] in +def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>; + +let Predicates = [HasStdExtZbb] in { +def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2), + (MIN GPR:$rs1, GPR:$rs2)>; +def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2), + (MAX GPR:$rs1, GPR:$rs2)>; +def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2), + (MINU GPR:$rs1, GPR:$rs2)>; +def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>; +def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2), + (MAXU GPR:$rs1, GPR:$rs2)>; +} // Predicates = [HasStdExtZbb] + +let Predicates = [HasStdExtZbb, IsRV64] in { +def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)), + (ADDIWU GPR:$rs, simm12:$simm12)>; +def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), + (ADDWU GPR:$rs1, GPR:$rs2)>; +def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)), + (SUBWU GPR:$rs1, GPR:$rs2)>; +def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), + (ADDUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))), + (SUBUW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SLOW GPR:$rs1, GPR:$rs2)>; +def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1), + (SROW GPR:$rs1, GPR:$rs2)>; +def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt), + (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)), + (CLZW GPR:$rs1)>; +// We don't pattern-match CTZW here as it has the same pattern and result as +// RV64 CTZ +def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>; +} // Predicates = [HasStdExtZbb, IsRV64] Index: llvm/test/CodeGen/RISCV/rv32Zbb.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rv32Zbb.ll @@ -0,0 +1,1188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IBB + +define i32 @slo_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: slo_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: slo_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: slo a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: slo_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slo a0, a0, a1 +; RV32IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shl = shl i32 %neg, %b + %neg1 = xor i32 %shl, -1 + ret i32 %neg1 +} + +define i64 @slo_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: slo_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: bltz a3, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: sll a1, a0, a3 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: addi a3, zero, 31 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: srl a3, a4, a3 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sll a2, a0, a2 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: not a0, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: slo_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: bltz a3, .LBB1_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: sll a1, a0, a3 +; RV32IB-NEXT: j .LBB1_3 +; RV32IB-NEXT: .LBB1_2: +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: sll a1, a1, a2 +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: sub a3, a3, a2 +; RV32IB-NEXT: srli a4, a0, 1 +; RV32IB-NEXT: srl a3, a4, a3 +; RV32IB-NEXT: or a1, a1, a3 +; RV32IB-NEXT: sll a2, a0, a2 +; RV32IB-NEXT: .LBB1_3: +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: not a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: slo_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: addi a3, a2, -32 +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: bltz a3, .LBB1_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: mv a2, zero +; RV32IBB-NEXT: sll a1, a0, a3 +; RV32IBB-NEXT: j .LBB1_3 +; RV32IBB-NEXT: .LBB1_2: +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: sll a1, a1, a2 +; RV32IBB-NEXT: addi a3, zero, 31 +; RV32IBB-NEXT: sub a3, a3, a2 +; RV32IBB-NEXT: srli a4, a0, 1 +; RV32IBB-NEXT: srl a3, a4, a3 +; RV32IBB-NEXT: or a1, a1, a3 +; RV32IBB-NEXT: sll a2, a0, a2 +; RV32IBB-NEXT: .LBB1_3: +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: not a0, a2 +; RV32IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shl = shl i64 %neg, %b + %neg1 = xor i64 %shl, -1 + ret i64 %neg1 +} + +define i32 @sro_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: sro_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sro_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sro a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sro_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sro a0, a0, a1 +; RV32IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shr = lshr i32 %neg, %b + %neg1 = xor i32 %shr, -1 + ret i32 %neg1 +} + +define i64 @sro_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: sro_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a3, a2, -32 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: srl a0, a1, a3 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: addi a3, zero, 31 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: sll a3, a4, a3 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: srl a2, a1, a2 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: not a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sro_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a3, a2, -32 +; RV32IB-NEXT: not a1, a1 +; RV32IB-NEXT: bltz a3, .LBB3_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: mv a2, zero +; RV32IB-NEXT: srl a0, a1, a3 +; RV32IB-NEXT: j .LBB3_3 +; RV32IB-NEXT: .LBB3_2: +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: srl a0, a0, a2 +; RV32IB-NEXT: addi a3, zero, 31 +; RV32IB-NEXT: sub a3, a3, a2 +; RV32IB-NEXT: slli a4, a1, 1 +; RV32IB-NEXT: sll a3, a4, a3 +; RV32IB-NEXT: or a0, a0, a3 +; RV32IB-NEXT: srl a2, a1, a2 +; RV32IB-NEXT: .LBB3_3: +; RV32IB-NEXT: not a0, a0 +; RV32IB-NEXT: not a1, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sro_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: addi a3, a2, -32 +; RV32IBB-NEXT: not a1, a1 +; RV32IBB-NEXT: bltz a3, .LBB3_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: mv a2, zero +; RV32IBB-NEXT: srl a0, a1, a3 +; RV32IBB-NEXT: j .LBB3_3 +; RV32IBB-NEXT: .LBB3_2: +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: srl a0, a0, a2 +; RV32IBB-NEXT: addi a3, zero, 31 +; RV32IBB-NEXT: sub a3, a3, a2 +; RV32IBB-NEXT: slli a4, a1, 1 +; RV32IBB-NEXT: sll a3, a4, a3 +; RV32IBB-NEXT: or a0, a0, a3 +; RV32IBB-NEXT: srl a2, a1, a2 +; RV32IBB-NEXT: .LBB3_3: +; RV32IBB-NEXT: not a0, a0 +; RV32IBB-NEXT: not a1, a2 +; RV32IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shr = lshr i64 %neg, %b + %neg1 = xor i64 %shr, -1 + ret i64 %neg1 +} + +define i32 @sloi_i32(i32 %a) nounwind { +; RV32I-LABEL: sloi_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: ori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sloi_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sloi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sloi_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sloi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = shl i32 %a, 1 + %neg12 = or i32 %neg, 1 + ret i32 %neg12 +} + +define i64 @sloi_i64(i64 %a) nounwind { +; RV32I-LABEL: sloi_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a0, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: ori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sloi_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a2, zero, 1 +; RV32IB-NEXT: fsl a1, a1, a2, a0 +; RV32IB-NEXT: sloi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sloi_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: srli a2, a0, 31 +; RV32IBB-NEXT: slli a1, a1, 1 +; RV32IBB-NEXT: or a1, a1, a2 +; RV32IBB-NEXT: sloi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = shl i64 %a, 1 + %neg12 = or i64 %neg, 1 + ret i64 %neg12 +} + +define i32 @sroi_i32(i32 %a) nounwind { +; RV32I-LABEL: sroi_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sroi_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sroi a0, a0, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sroi_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sroi a0, a0, 1 +; RV32IBB-NEXT: ret + %neg = lshr i32 %a, 1 + %neg12 = or i32 %neg, -2147483648 + ret i32 %neg12 +} + +define i64 @sroi_i64(i64 %a) nounwind { +; RV32I-LABEL: sroi_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a1, 31 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sroi_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: addi a2, zero, 31 +; RV32IB-NEXT: fsl a0, a1, a2, a0 +; RV32IB-NEXT: sroi a1, a1, 1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sroi_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: slli a2, a1, 31 +; RV32IBB-NEXT: srli a0, a0, 1 +; RV32IBB-NEXT: or a0, a0, a2 +; RV32IBB-NEXT: sroi a1, a1, 1 +; RV32IBB-NEXT: ret + %neg = lshr i64 %a, 1 + %neg12 = or i64 %neg, -9223372036854775808 + ret i64 %neg12 +} + +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @ctlz_i32(i32 %a) nounwind { +; RV32I-LABEL: ctlz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: beqz a0, .LBB8_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: addi a0, zero, 32 +; RV32I-NEXT: .LBB8_3: # %cond.end +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctlz_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beqz a0, .LBB8_2 +; RV32IB-NEXT: # %bb.1: # %cond.false +; RV32IB-NEXT: clz a0, a0 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB8_2: +; RV32IB-NEXT: addi a0, zero, 32 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctlz_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beqz a0, .LBB8_2 +; RV32IBB-NEXT: # %bb.1: # %cond.false +; RV32IBB-NEXT: clz a0, a0 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB8_2: +; RV32IBB-NEXT: addi a0, zero, 32 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV32I-LABEL: ctlz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: sw s6, 0(sp) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s1, a1, 819 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s0, a1, 257 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: srli a0, s4, 1 +; RV32I-NEXT: or a0, s4, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: bnez s3, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s6, 0(sp) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctlz_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bnez a1, .LBB9_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: clz a0, a0 +; RV32IB-NEXT: addi a0, a0, 32 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB9_2: +; RV32IB-NEXT: clz a0, a1 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctlz_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: bnez a1, .LBB9_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: clz a0, a0 +; RV32IBB-NEXT: addi a0, a0, 32 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB9_2: +; RV32IBB-NEXT: clz a0, a1 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define i32 @cttz_i32(i32 %a) nounwind { +; RV32I-LABEL: cttz_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: beqz a0, .LBB10_2 +; RV32I-NEXT: # %bb.1: # %cond.false +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: not a0, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: j .LBB10_3 +; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: addi a0, zero, 32 +; RV32I-NEXT: .LBB10_3: # %cond.end +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cttz_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beqz a0, .LBB10_2 +; RV32IB-NEXT: # %bb.1: # %cond.false +; RV32IB-NEXT: ctz a0, a0 +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB10_2: +; RV32IB-NEXT: addi a0, zero, 32 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: cttz_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beqz a0, .LBB10_2 +; RV32IBB-NEXT: # %bb.1: # %cond.false +; RV32IBB-NEXT: ctz a0, a0 +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB10_2: +; RV32IBB-NEXT: addi a0, zero, 32 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV32I-LABEL: cttz_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: sw s6, 0(sp) +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s5, a2, 1365 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s0, a1, 819 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s6, a1, -241 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: addi a0, s3, -1 +; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s6 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: bnez s4, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: j .LBB11_3 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s6, 0(sp) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: cttz_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: bnez a0, .LBB11_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: ctz a0, a1 +; RV32IB-NEXT: addi a0, a0, 32 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; RV32IB-NEXT: .LBB11_2: +; RV32IB-NEXT: ctz a0, a0 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: cttz_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: bnez a0, .LBB11_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: ctz a0, a1 +; RV32IBB-NEXT: addi a0, a0, 32 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret +; RV32IBB-NEXT: .LBB11_2: +; RV32IBB-NEXT: ctz a0, a0 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.ctpop.i32(i32) + +define i32 @ctpop_i32(i32 %a) nounwind { +; RV32I-LABEL: ctpop_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi a1, a1, -241 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctpop_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pcnt a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctpop_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pcnt a0, a0 +; RV32IBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.ctpop.i64(i64) + +define i64 @ctpop_i64(i64 %a) nounwind { +; RV32I-LABEL: ctpop_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi s3, a2, 1365 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: lui a1, 209715 +; RV32I-NEXT: addi s0, a1, 819 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lui a1, 61681 +; RV32I-NEXT: addi s4, a1, -241 +; RV32I-NEXT: and a0, a0, s4 +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi s1, a1, 257 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli a0, s2, 1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: sub a0, s2, a0 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: and a0, a0, s4 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: ctpop_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: pcnt a1, a1 +; RV32IB-NEXT: pcnt a0, a0 +; RV32IB-NEXT: add a0, a0, a1 +; RV32IB-NEXT: mv a1, zero +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: ctpop_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: pcnt a1, a1 +; RV32IBB-NEXT: pcnt a0, a0 +; RV32IBB-NEXT: add a0, a0, a1 +; RV32IBB-NEXT: mv a1, zero +; RV32IBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define i32 @sextb_i32(i32 %a) nounwind { +; RV32I-LABEL: sextb_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sextb_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.b a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sextb_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.b a0, a0 +; RV32IBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV32I-LABEL: sextb_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a0, a1, 24 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sextb_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.b a2, a0 +; RV32IB-NEXT: slli a0, a0, 24 +; RV32IB-NEXT: srai a1, a0, 31 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sextb_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.b a2, a0 +; RV32IBB-NEXT: slli a0, a0, 24 +; RV32IBB-NEXT: srai a1, a0, 31 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define i32 @sexth_i32(i32 %a) nounwind { +; RV32I-LABEL: sexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sexth_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.h a0, a0 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sexth_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.h a0, a0 +; RV32IBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV32I-LABEL: sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32IB-LABEL: sexth_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: sext.h a2, a0 +; RV32IB-NEXT: slli a0, a0, 16 +; RV32IB-NEXT: srai a1, a0, 31 +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: sexth_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: sext.h a2, a0 +; RV32IBB-NEXT: slli a0, a0, 16 +; RV32IBB-NEXT: srai a1, a0, 31 +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define i32 @min_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: min_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: blt a0, a1, .LBB18_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB18_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: min_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: min a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: min_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: min a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @min_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: min_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB19_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB19_3 +; RV32I-NEXT: j .LBB19_4 +; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB19_4 +; RV32I-NEXT: .LBB19_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB19_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: min_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB19_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: slt a4, a1, a3 +; RV32IB-NEXT: beqz a4, .LBB19_3 +; RV32IB-NEXT: j .LBB19_4 +; RV32IB-NEXT: .LBB19_2: +; RV32IB-NEXT: sltu a4, a0, a2 +; RV32IB-NEXT: bnez a4, .LBB19_4 +; RV32IB-NEXT: .LBB19_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB19_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: min_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB19_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: slt a4, a1, a3 +; RV32IBB-NEXT: beqz a4, .LBB19_3 +; RV32IBB-NEXT: j .LBB19_4 +; RV32IBB-NEXT: .LBB19_2: +; RV32IBB-NEXT: sltu a4, a0, a2 +; RV32IBB-NEXT: bnez a4, .LBB19_4 +; RV32IBB-NEXT: .LBB19_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB19_4: +; RV32IBB-NEXT: ret + %cmp = icmp slt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @max_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: max_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: blt a1, a0, .LBB20_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB20_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: max_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: max a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: max_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: max a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @max_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: max_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB21_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB21_3 +; RV32I-NEXT: j .LBB21_4 +; RV32I-NEXT: .LBB21_2: +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB21_4 +; RV32I-NEXT: .LBB21_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB21_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: max_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB21_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: slt a4, a3, a1 +; RV32IB-NEXT: beqz a4, .LBB21_3 +; RV32IB-NEXT: j .LBB21_4 +; RV32IB-NEXT: .LBB21_2: +; RV32IB-NEXT: sltu a4, a2, a0 +; RV32IB-NEXT: bnez a4, .LBB21_4 +; RV32IB-NEXT: .LBB21_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB21_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: max_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB21_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: slt a4, a3, a1 +; RV32IBB-NEXT: beqz a4, .LBB21_3 +; RV32IBB-NEXT: j .LBB21_4 +; RV32IBB-NEXT: .LBB21_2: +; RV32IBB-NEXT: sltu a4, a2, a0 +; RV32IBB-NEXT: bnez a4, .LBB21_4 +; RV32IBB-NEXT: .LBB21_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB21_4: +; RV32IBB-NEXT: ret + %cmp = icmp sgt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @minu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: minu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: bltu a0, a1, .LBB22_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB22_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: minu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: minu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: minu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: minu a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @minu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: minu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB23_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a1, a3 +; RV32I-NEXT: beqz a4, .LBB23_3 +; RV32I-NEXT: j .LBB23_4 +; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: bnez a4, .LBB23_4 +; RV32I-NEXT: .LBB23_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB23_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: minu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB23_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sltu a4, a1, a3 +; RV32IB-NEXT: beqz a4, .LBB23_3 +; RV32IB-NEXT: j .LBB23_4 +; RV32IB-NEXT: .LBB23_2: +; RV32IB-NEXT: sltu a4, a0, a2 +; RV32IB-NEXT: bnez a4, .LBB23_4 +; RV32IB-NEXT: .LBB23_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB23_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: minu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB23_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sltu a4, a1, a3 +; RV32IBB-NEXT: beqz a4, .LBB23_3 +; RV32IBB-NEXT: j .LBB23_4 +; RV32IBB-NEXT: .LBB23_2: +; RV32IBB-NEXT: sltu a4, a0, a2 +; RV32IBB-NEXT: bnez a4, .LBB23_4 +; RV32IBB-NEXT: .LBB23_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB23_4: +; RV32IBB-NEXT: ret + %cmp = icmp ult i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define i32 @maxu_i32(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: maxu_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: bltu a1, a0, .LBB24_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: maxu_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT: maxu a0, a0, a1 +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: maxu_i32: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: maxu a0, a0, a1 +; RV32IBB-NEXT: ret + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @maxu_i64(i64 %a, i64 %b) nounwind { +; RV32I-LABEL: maxu_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: beq a1, a3, .LBB25_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a3, a1 +; RV32I-NEXT: beqz a4, .LBB25_3 +; RV32I-NEXT: j .LBB25_4 +; RV32I-NEXT: .LBB25_2: +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: bnez a4, .LBB25_4 +; RV32I-NEXT: .LBB25_3: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB25_4: +; RV32I-NEXT: ret +; +; RV32IB-LABEL: maxu_i64: +; RV32IB: # %bb.0: +; RV32IB-NEXT: beq a1, a3, .LBB25_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT: sltu a4, a3, a1 +; RV32IB-NEXT: beqz a4, .LBB25_3 +; RV32IB-NEXT: j .LBB25_4 +; RV32IB-NEXT: .LBB25_2: +; RV32IB-NEXT: sltu a4, a2, a0 +; RV32IB-NEXT: bnez a4, .LBB25_4 +; RV32IB-NEXT: .LBB25_3: +; RV32IB-NEXT: mv a0, a2 +; RV32IB-NEXT: mv a1, a3 +; RV32IB-NEXT: .LBB25_4: +; RV32IB-NEXT: ret +; +; RV32IBB-LABEL: maxu_i64: +; RV32IBB: # %bb.0: +; RV32IBB-NEXT: beq a1, a3, .LBB25_2 +; RV32IBB-NEXT: # %bb.1: +; RV32IBB-NEXT: sltu a4, a3, a1 +; RV32IBB-NEXT: beqz a4, .LBB25_3 +; RV32IBB-NEXT: j .LBB25_4 +; RV32IBB-NEXT: .LBB25_2: +; RV32IBB-NEXT: sltu a4, a2, a0 +; RV32IBB-NEXT: bnez a4, .LBB25_4 +; RV32IBB-NEXT: .LBB25_3: +; RV32IBB-NEXT: mv a0, a2 +; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: .LBB25_4: +; RV32IBB-NEXT: ret + %cmp = icmp ugt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} Index: llvm/test/CodeGen/RISCV/rv64Zbb.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/rv64Zbb.ll @@ -0,0 +1,1149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IB +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IBB + +define signext i32 @slo_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: slo_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slo_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slow a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slo_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slow a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shl = shl i32 %neg, %b + %neg1 = xor i32 %shl, -1 + ret i32 %neg1 +} + +define i64 @slo_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: slo_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slo_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slo a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slo_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slo a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shl = shl i64 %neg, %b + %neg1 = xor i64 %shl, -1 + ret i64 %neg1 +} + +define signext i32 @sro_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: sro_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sro_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: srow a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sro_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: srow a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i32 %a, -1 + %shr = lshr i32 %neg, %b + %neg1 = xor i32 %shr, -1 + ret i32 %neg1 +} + +define i64 @sro_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: sro_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sro_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sro a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sro_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sro a0, a0, a1 +; RV64IBB-NEXT: ret + %neg = xor i64 %a, -1 + %shr = lshr i64 %neg, %b + %neg1 = xor i64 %shr, -1 + ret i64 %neg1 +} + +define signext i32 @sloi_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sloi_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: ori a0, a0, 1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sloi_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sloiw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sloi_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sloiw a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = shl i32 %a, 1 + %neg12 = or i32 %neg, 1 + ret i32 %neg12 +} + +define i64 @sloi_i64(i64 %a) nounwind { +; RV64I-LABEL: sloi_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: ori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sloi_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sloi a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sloi_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sloi a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = shl i64 %a, 1 + %neg12 = or i64 %neg, 1 + ret i64 %neg12 +} + +define signext i32 @sroi_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sroi_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sroi_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sroiw a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sroi_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sroiw a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = lshr i32 %a, 1 + %neg12 = or i32 %neg, -2147483648 + ret i32 %neg12 +} + +define i64 @sroi_i64(i64 %a) nounwind { +; RV64I-LABEL: sroi_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sroi_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sroi a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sroi_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sroi a0, a0, 1 +; RV64IBB-NEXT: ret + %neg = lshr i64 %a, 1 + %neg12 = or i64 %neg, -9223372036854775808 + ret i64 %neg12 +} + +declare i32 @llvm.ctlz.i32(i32, i1) + +define signext i32 @ctlz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ctlz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB8_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: addi a0, a0, -32 +; RV64I-NEXT: j .LBB8_3 +; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: addi a0, zero, 32 +; RV64I-NEXT: .LBB8_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctlz_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB8_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: clzw a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB8_2: +; RV64IB-NEXT: addi a0, zero, 32 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctlz_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB8_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: clzw a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB8_2: +; RV64IBB-NEXT: addi a0, zero, 32 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.ctlz.i64(i64, i1) + +define i64 @ctlz_i64(i64 %a) nounwind { +; RV64I-LABEL: ctlz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB9_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB9_3 +; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: addi a0, zero, 64 +; RV64I-NEXT: .LBB9_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctlz_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB9_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: clz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB9_2: +; RV64IB-NEXT: addi a0, zero, 64 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctlz_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB9_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: clz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB9_2: +; RV64IBB-NEXT: addi a0, zero, 64 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.cttz.i32(i32, i1) + +define signext i32 @cttz_i32(i32 signext %a) nounwind { +; RV64I-LABEL: cttz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB10_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB10_3 +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: addi a0, zero, 32 +; RV64I-NEXT: .LBB10_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cttz_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB10_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: ctz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB10_2: +; RV64IB-NEXT: addi a0, zero, 32 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: cttz_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB10_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: ctz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB10_2: +; RV64IBB-NEXT: addi a0, zero, 32 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %1 +} + +declare i64 @llvm.cttz.i64(i64, i1) + +define i64 @cttz_i64(i64 %a) nounwind { +; RV64I-LABEL: cttz_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: beqz a0, .LBB11_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: j .LBB11_3 +; RV64I-NEXT: .LBB11_2: +; RV64I-NEXT: addi a0, zero, 64 +; RV64I-NEXT: .LBB11_3: # %cond.end +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: cttz_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: beqz a0, .LBB11_2 +; RV64IB-NEXT: # %bb.1: # %cond.false +; RV64IB-NEXT: ctz a0, a0 +; RV64IB-NEXT: ret +; RV64IB-NEXT: .LBB11_2: +; RV64IB-NEXT: addi a0, zero, 64 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: cttz_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: beqz a0, .LBB11_2 +; RV64IBB-NEXT: # %bb.1: # %cond.false +; RV64IBB-NEXT: ctz a0, a0 +; RV64IBB-NEXT: ret +; RV64IBB-NEXT: .LBB11_2: +; RV64IBB-NEXT: addi a0, zero, 64 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %1 +} + +declare i32 @llvm.ctpop.i32(i32) + +define signext i32 @ctpop_i32(i32 signext %a) nounwind { +; RV64I-LABEL: ctpop_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srliw a0, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctpop_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pcntw a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctpop_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pcntw a0, a0 +; RV64IBB-NEXT: ret + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +declare i64 @llvm.ctpop.i64(i64) + +define i64 @ctpop_i64(i64 %a) nounwind { +; RV64I-LABEL: ctpop_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 21845 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 13107 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctpop_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: pcnt a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctpop_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: pcnt a0, a0 +; RV64IBB-NEXT: ret + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define signext i32 @sextb_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sextb_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sextb_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sextb_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.b a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i32 %a, 24 + %shr = ashr exact i32 %shl, 24 + ret i32 %shr +} + +define i64 @sextb_i64(i64 %a) nounwind { +; RV64I-LABEL: sextb_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sextb_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.b a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sextb_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.b a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i64 %a, 56 + %shr = ashr exact i64 %shl, 56 + ret i64 %shr +} + +define signext i32 @sexth_i32(i32 signext %a) nounwind { +; RV64I-LABEL: sexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sexth_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sexth_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.h a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i32 %a, 16 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + +define i64 @sexth_i64(i64 %a) nounwind { +; RV64I-LABEL: sexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: sexth_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: sext.h a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: sexth_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: sext.h a0, a0 +; RV64IBB-NEXT: ret + %shl = shl i64 %a, 48 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + +define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: min_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a0, a1, .LBB18_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: min_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: min a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: min_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: min a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @min_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: min_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a0, a1, .LBB19_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: min_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: min a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: min_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: min a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp slt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: max_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a1, a0, .LBB20_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: max_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: max a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: max_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: max a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @max_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: max_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: blt a1, a0, .LBB21_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB21_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: max_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: max a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: max_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: max a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp sgt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: minu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a0, a1, .LBB22_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB22_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: minu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: minu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: minu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: minu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @minu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: minu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a0, a1, .LBB23_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB23_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: minu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: minu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: minu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: minu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ult i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: maxu_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a1, a0, .LBB24_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB24_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: maxu_i32: +; RV64IB: # %bb.0: +; RV64IB-NEXT: maxu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: maxu_i32: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: maxu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, i32 %a, i32 %b + ret i32 %cond +} + +define i64 @maxu_i64(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: maxu_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: bltu a1, a0, .LBB25_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB25_2: +; RV64I-NEXT: ret +; +; RV64IB-LABEL: maxu_i64: +; RV64IB: # %bb.0: +; RV64IB-NEXT: maxu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: maxu_i64: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: maxu a0, a0, a1 +; RV64IBB-NEXT: ret + %cmp = icmp ugt i64 %a, %b + %cond = select i1 %cmp, i64 %a, i64 %b + ret i64 %cond +} + +; We select a i32 addi that zero-extends the result on RV64 as addiwu + +define zeroext i32 @zext_add_to_addiwu(i32 signext %a) nounwind { +; RV64I-LABEL: zext_add_to_addiwu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_add_to_addiwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addiwu a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_add_to_addiwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addiwu a0, a0, 1 +; RV64IBB-NEXT: ret + %add = add i32 %a, 1 + ret i32 %add +} + +define i64 @addiwu(i64 %a) nounwind { +; RV64I-LABEL: addiwu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: addiwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addiwu a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: addiwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addiwu a0, a0, 1 +; RV64IBB-NEXT: ret + %conv = add i64 %a, 1 + %conv1 = and i64 %conv, 4294967295 + ret i64 %conv1 +} + +define i64 @slliuw(i64 %a) nounwind { +; RV64I-LABEL: slliuw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: addi a1, zero, 1 +; RV64I-NEXT: slli a1, a1, 33 +; RV64I-NEXT: addi a1, a1, -2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: slliuw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: slliu.w a0, a0, 1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: slliuw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: slliu.w a0, a0, 1 +; RV64IBB-NEXT: ret + %conv1 = shl i64 %a, 1 + %shl = and i64 %conv1, 8589934590 + ret i64 %shl +} + +; We select a i32 add that zero-extends the result on RV64 as addwu + +define zeroext i32 @zext_add_to_addwu(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: zext_add_to_addwu: +; RV64I: # %bb.0: +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_add_to_addwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_add_to_addwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addwu a0, a0, a1 +; RV64IBB-NEXT: ret + %add = add i32 %a, %b + ret i32 %add +} + +define i64 @addwu(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: addwu: +; RV64I: # %bb.0: +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: addwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addwu a0, a1, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: addwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addwu a0, a1, a0 +; RV64IBB-NEXT: ret + %add = add i64 %b, %a + %conv1 = and i64 %add, 4294967295 + ret i64 %conv1 +} + +; We select a i32 sub that zero-extends the result on RV64 as subwu + +define zeroext i32 @zext_sub_to_subwu(i32 signext %a, i32 signext %b) nounwind { +; RV64I-LABEL: zext_sub_to_subwu: +; RV64I: # %bb.0: +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: zext_sub_to_subwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: zext_sub_to_subwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subwu a0, a0, a1 +; RV64IBB-NEXT: ret + %sub = sub i32 %a, %b + ret i32 %sub +} + +define i64 @subwu(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: subwu: +; RV64I: # %bb.0: +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: subwu: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subwu a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: subwu: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subwu a0, a0, a1 +; RV64IBB-NEXT: ret + %sub = sub i64 %a, %b + %conv1 = and i64 %sub, 4294967295 + ret i64 %conv1 +} + +define i64 @adduw(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: adduw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: adduw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addu.w a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: adduw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: addu.w a0, a0, a1 +; RV64IBB-NEXT: ret + %and = and i64 %b, 4294967295 + %add = add i64 %and, %a + ret i64 %add +} + +define i64 @subuw(i64 %a, i64 %b) nounwind { +; RV64I-LABEL: subuw: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: subuw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: subu.w a0, a0, a1 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: subuw: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: subu.w a0, a0, a1 +; RV64IBB-NEXT: ret + %and = and i64 %b, 4294967295 + %sub = sub i64 %a, %and + ret i64 %sub +}