diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -44,6 +44,10 @@ FTINT, + // Byte counting operations + CLZW, + CTZW, + BSTRINS, BSTRPICK, @@ -88,6 +92,8 @@ SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; private: /// Target-specific function used to lower LoongArch calling conventions. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -53,6 +53,7 @@ setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); setOperationAction(ISD::ROTL, GRLenVT, Expand); + setOperationAction(ISD::CTPOP, GRLenVT, Expand); setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); @@ -68,6 +69,8 @@ setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::ROTR, MVT::i32, Custom); setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::CTTZ, MVT::i32, Custom); + setOperationAction(ISD::CTLZ, MVT::i32, Custom); if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); } @@ -370,6 +373,10 @@ return LoongArchISD::ROTR_W; case ISD::ROTL: return LoongArchISD::ROTL_W; + case ISD::CTTZ: + return LoongArchISD::CTZW; + case ISD::CTLZ: + return LoongArchISD::CLZW; } } @@ -378,14 +385,26 @@ // otherwise be promoted to i64, making it difficult to select the // SLL_W/.../*W later one because the fact the operation was originally of // type i8/i16/i32 is lost. -static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); - SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); - SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); - // ReplaceNodeResults requires we maintain the same type for the return value. + SDValue NewOp0, NewRes; + + if (NumOp == 2) { + NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); + NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + } else if (NumOp == 1) { + NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); + } else { + // TODO:Handle more NumOp. + llvm_unreachable("Unexpected NumOp"); + } + + // ReplaceNodeResults requires we maintain the same type for the return + // value. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } @@ -402,14 +421,14 @@ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); if (N->getOperand(1).getOpcode() != ISD::Constant) { - Results.push_back(customLegalizeToWOp(N, DAG)); + Results.push_back(customLegalizeToWOp(N, DAG, 2)); break; } break; case ISD::ROTL: ConstantSDNode *CN; if ((CN = dyn_cast(N->getOperand(1)))) { - Results.push_back(customLegalizeToWOp(N, DAG)); + Results.push_back(customLegalizeToWOp(N, DAG, 2)); break; } break; @@ -466,6 +485,13 @@ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); break; } + case ISD::CTLZ: + case ISD::CTTZ: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToWOp(N, DAG, 1)); + break; + } } } @@ -899,6 +925,8 @@ NODE_NAME_CASE(REVB_2W) NODE_NAME_CASE(ROTR_W) NODE_NAME_CASE(ROTL_W) + NODE_NAME_CASE(CLZW) + NODE_NAME_CASE(CTZW) } #undef NODE_NAME_CASE return nullptr; @@ -1635,3 +1663,7 @@ return false; return (Imm.isZero() || Imm.isExactlyValue(+1.0)); } + +bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } + +bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -59,6 +59,8 @@ : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; +def loongarch_clzw : SDNode<"LoongArchISD::CLZW", SDTIntBitCountUnaryOp>; +def loongarch_ctzw : SDNode<"LoongArchISD::CTZW", SDTIntBitCountUnaryOp>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -589,6 +591,8 @@ : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32 : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; +class PatGpr + : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; class PatGprImm : Pat<(OpNode GPR:$rj, ImmOpnd:$imm), @@ -671,6 +675,14 @@ def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), (MULW_D_WU GPR:$rj, GPR:$rk)>; +def : PatGpr; +def : PatGpr; +def : Pat<(ctlz (not GPR:$rj)), (CLO_D GPR:$rj)>; +def : Pat<(cttz (not GPR:$rj)), (CTO_D GPR:$rj)>; +def : PatGpr; +def : PatGpr; +def : Pat<(loongarch_clzw (not GPR:$rj)), (CLO_W GPR:$rj)>; +def : Pat<(loongarch_ctzw (not GPR:$rj)), (CTO_W GPR:$rj)>; } // Predicates = [IsLA64] def : PatGprGpr; @@ -683,6 +695,10 @@ def : Pat<(not (or GPR:$rj, GPR:$rk)), (NOR GPR:$rj, GPR:$rk)>; def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>; def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>; +def : PatGpr; +def : PatGpr; +def : Pat<(ctlz (not GPR:$rj)), (CLO_W GPR:$rj)>; +def : Pat<(cttz (not GPR:$rj)), (CTO_W GPR:$rj)>; /// FrameIndex calculations let Predicates = [IsLA32] in { diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -0,0 +1,514 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i8 @llvm.ctpop.i8(i8) +declare i16 @llvm.ctpop.i16(i16) +declare i32 @llvm.ctpop.i32(i32) +declare i64 @llvm.ctpop.i64(i64) +declare i8 @llvm.cttz.i8(i8, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1) + +define i8 @test_ctlz_i8(i8 %a) nounwind { +; LA32-LABEL: test_ctlz_i8: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -24 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctlz_i8: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -56 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_ctlz_i16(i16 %a) nounwind { +; LA32-LABEL: test_ctlz_i16: +; LA32: # %bb.0: +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctlz_i16: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -48 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_ctlz_i32(i32 %a) nounwind { +; LA32-LABEL: test_ctlz_i32: +; LA32: # %bb.0: +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctlz_i32: +; LA64: # %bb.0: +; LA64-NEXT: clz.w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i64 @test_ctlz_i64(i64 %a) nounwind { +; LA32-LABEL: test_ctlz_i64: +; LA32: # %bb.0: +; LA32-NEXT: sltu $a2, $zero, $a1 +; LA32-NEXT: clz.w $a1, $a1 +; LA32-NEXT: maskeqz $a1, $a1, $a2 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, 32 +; LA32-NEXT: masknez $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctlz_i64: +; LA64: # %bb.0: +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +define i8 @test_not_ctlz_i8(i8 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 255 +; LA32-NEXT: andn $a0, $a1, $a0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -24 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_ctlz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 255 +; LA64-NEXT: andn $a0, $a1, $a0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -56 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i8 %a, -1 + %tmp = call i8 @llvm.ctlz.i8(i8 %neg, i1 false) + ret i8 %tmp +} + +define i16 @test_not_ctlz_i16(i16 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i16: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: clz.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, -16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_ctlz_i16: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: clz.d $a0, $a0 +; LA64-NEXT: addi.d $a0, $a0, -48 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i16 %a, -1 + %tmp = call i16 @llvm.ctlz.i16(i16 %neg, i1 false) + ret i16 %tmp +} + +define i32 @test_not_ctlz_i32(i32 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i32: +; LA32: # %bb.0: +; LA32-NEXT: clo.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_ctlz_i32: +; LA64: # %bb.0: +; LA64-NEXT: clo.w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i32 %a, -1 + %tmp = call i32 @llvm.ctlz.i32(i32 %neg, i1 false) + ret i32 %tmp +} + +define i64 @test_not_ctlz_i64(i64 %a) nounwind { +; LA32-LABEL: test_not_ctlz_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a2, $a1, $zero +; LA32-NEXT: sltu $a2, $zero, $a2 +; LA32-NEXT: clo.w $a0, $a0 +; LA32-NEXT: addi.w $a0, $a0, 32 +; LA32-NEXT: masknez $a0, $a0, $a2 +; LA32-NEXT: clo.w $a1, $a1 +; LA32-NEXT: maskeqz $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_ctlz_i64: +; LA64: # %bb.0: +; LA64-NEXT: clo.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i64 %a, -1 + %tmp = call i64 @llvm.ctlz.i64(i64 %neg, i1 false) + ret i64 %tmp +} + +define i8 @test_ctpop_i8(i8 %a) nounwind { +; LA32-LABEL: test_ctpop_i8: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a1, $a0, 1 +; LA32-NEXT: andi $a1, $a1, 85 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: andi $a1, $a0, 51 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: andi $a0, $a0, 51 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctpop_i8: +; LA64: # %bb.0: +; LA64-NEXT: srli.d $a1, $a0, 1 +; LA64-NEXT: andi $a1, $a1, 85 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: andi $a1, $a0, 51 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: andi $a0, $a0, 51 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i8 @llvm.ctpop.i8(i8 %a) + ret i8 %1 +} + +define i16 @test_ctpop_i16(i16 %a) nounwind { +; LA32-LABEL: test_ctpop_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 5 +; LA32-NEXT: ori $a1, $a1, 1365 +; LA32-NEXT: srli.w $a2, $a0, 1 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 3 +; LA32-NEXT: ori $a1, $a1, 819 +; LA32-NEXT: and $a2, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8 +; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctpop_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 5 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 3 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8 +; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %1 +} + +define i32 @test_ctpop_i32(i32 %a) nounwind { +; LA32-LABEL: test_ctpop_i32: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 349525 +; LA32-NEXT: ori $a1, $a1, 1365 +; LA32-NEXT: srli.w $a2, $a0, 1 +; LA32-NEXT: and $a1, $a2, $a1 +; LA32-NEXT: sub.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 209715 +; LA32-NEXT: ori $a1, $a1, 819 +; LA32-NEXT: and $a2, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: srli.w $a1, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 61680 +; LA32-NEXT: ori $a1, $a1, 3855 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a1, 4112 +; LA32-NEXT: ori $a1, $a1, 257 +; LA32-NEXT: mul.w $a0, $a0, $a1 +; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctpop_i32: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 349525 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 209715 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 61680 +; LA64-NEXT: ori $a1, $a1, 3855 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 4112 +; LA64-NEXT: ori $a1, $a1, 257 +; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + +define i64 @test_ctpop_i64(i64 %a) nounwind { +; LA32-LABEL: test_ctpop_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 349525 +; LA32-NEXT: ori $a2, $a2, 1365 +; LA32-NEXT: srli.w $a3, $a0, 1 +; LA32-NEXT: and $a3, $a3, $a2 +; LA32-NEXT: sub.w $a0, $a0, $a3 +; LA32-NEXT: lu12i.w $a3, 209715 +; LA32-NEXT: ori $a3, $a3, 819 +; LA32-NEXT: and $a4, $a0, $a3 +; LA32-NEXT: srli.w $a0, $a0, 2 +; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: add.w $a0, $a4, $a0 +; LA32-NEXT: srli.w $a4, $a1, 1 +; LA32-NEXT: and $a2, $a4, $a2 +; LA32-NEXT: sub.w $a1, $a1, $a2 +; LA32-NEXT: srli.w $a2, $a0, 4 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: and $a2, $a1, $a3 +; LA32-NEXT: srli.w $a1, $a1, 2 +; LA32-NEXT: and $a1, $a1, $a3 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: srli.w $a2, $a1, 4 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: lu12i.w $a2, 61680 +; LA32-NEXT: ori $a2, $a2, 3855 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: lu12i.w $a2, 4112 +; LA32-NEXT: ori $a2, $a2, 257 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: mul.w $a1, $a1, $a2 +; LA32-NEXT: srli.w $a1, $a1, 24 +; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_ctpop_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 349525 +; LA64-NEXT: ori $a1, $a1, 1365 +; LA64-NEXT: lu32i.d $a1, 349525 +; LA64-NEXT: lu52i.d $a1, $a1, 1365 +; LA64-NEXT: srli.d $a2, $a0, 1 +; LA64-NEXT: and $a1, $a2, $a1 +; LA64-NEXT: sub.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 209715 +; LA64-NEXT: ori $a1, $a1, 819 +; LA64-NEXT: lu32i.d $a1, 209715 +; LA64-NEXT: lu52i.d $a1, $a1, 819 +; LA64-NEXT: and $a2, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 2 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: add.d $a0, $a2, $a0 +; LA64-NEXT: srli.d $a1, $a0, 4 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 61680 +; LA64-NEXT: ori $a1, $a1, 3855 +; LA64-NEXT: lu32i.d $a1, -61681 +; LA64-NEXT: lu52i.d $a1, $a1, 240 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a1, 4112 +; LA64-NEXT: ori $a1, $a1, 257 +; LA64-NEXT: lu32i.d $a1, 65793 +; LA64-NEXT: lu52i.d $a1, $a1, 16 +; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: srli.d $a0, $a0, 56 +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %1 +} + +define i8 @test_cttz_i8(i8 %a) nounwind { +; LA32-LABEL: test_cttz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a0, $a0, 256 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_cttz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a0, $a0, 256 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) + ret i8 %tmp +} + +define i16 @test_cttz_i16(i16 %a) nounwind { +; LA32-LABEL: test_cttz_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 16 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_cttz_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 16 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) + ret i16 %tmp +} + +define i32 @test_cttz_i32(i32 %a) nounwind { +; LA32-LABEL: test_cttz_i32: +; LA32: # %bb.0: +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_cttz_i32: +; LA64: # %bb.0: +; LA64-NEXT: ctz.w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %tmp +} + +define i64 @test_cttz_i64(i64 %a) nounwind { +; LA32-LABEL: test_cttz_i64: +; LA32: # %bb.0: +; LA32-NEXT: sltu $a2, $zero, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: maskeqz $a0, $a0, $a2 +; LA32-NEXT: ctz.w $a1, $a1 +; LA32-NEXT: addi.w $a1, $a1, 32 +; LA32-NEXT: masknez $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_cttz_i64: +; LA64: # %bb.0: +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +define i8 @test_not_cttz_i8(i8 %a) nounwind { +; LA32-LABEL: test_not_cttz_i8: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 256 +; LA32-NEXT: orn $a0, $a1, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_cttz_i8: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 256 +; LA64-NEXT: orn $a0, $a1, $a0 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i8 %a, -1 + %tmp = call i8 @llvm.cttz.i8(i8 %neg, i1 false) + ret i8 %tmp +} + +define i16 @test_not_cttz_i16(i16 %a) nounwind { +; LA32-LABEL: test_not_cttz_i16: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 16 +; LA32-NEXT: orn $a0, $a1, $a0 +; LA32-NEXT: ctz.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_cttz_i16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 16 +; LA64-NEXT: orn $a0, $a1, $a0 +; LA64-NEXT: ctz.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i16 %a, -1 + %tmp = call i16 @llvm.cttz.i16(i16 %neg, i1 false) + ret i16 %tmp +} + +define i32 @test_not_cttz_i32(i32 %a) nounwind { +; LA32-LABEL: test_not_cttz_i32: +; LA32: # %bb.0: +; LA32-NEXT: cto.w $a0, $a0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_cttz_i32: +; LA64: # %bb.0: +; LA64-NEXT: cto.w $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i32 %a, -1 + %tmp = call i32 @llvm.cttz.i32(i32 %neg, i1 false) + ret i32 %tmp +} + +define i64 @test_not_cttz_i64(i64 %a) nounwind { +; LA32-LABEL: test_not_cttz_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a2, $a0, $zero +; LA32-NEXT: sltu $a2, $zero, $a2 +; LA32-NEXT: cto.w $a1, $a1 +; LA32-NEXT: addi.w $a1, $a1, 32 +; LA32-NEXT: masknez $a1, $a1, $a2 +; LA32-NEXT: cto.w $a0, $a0 +; LA32-NEXT: maskeqz $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_not_cttz_i64: +; LA64: # %bb.0: +; LA64-NEXT: cto.d $a0, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + %neg = xor i64 %a, -1 + %tmp = call i64 @llvm.cttz.i64(i64 %neg, i1 false) + ret i64 %tmp +}