diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -231,6 +231,12 @@ UMIN_VL, UMAX_VL, + BITREVERSE_VL, + BSWAP_VL, + CTLZ_VL, + CTTZ_VL, + CTPOP_VL, + SADDSAT_VL, UADDSAT_VL, SSUBSAT_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -681,11 +681,7 @@ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); - setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand); setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand); - setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, - ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, - VT, Expand); // Custom-lower extensions and truncations from/to mask types. setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, @@ -753,9 +749,17 @@ if (Subtarget.hasStdExtZvbb()) { setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal); + setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom); + setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, + ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, + VT, Custom); } else { setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand); - setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, VT, Expand); + setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand); + setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); + setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, + ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, + VT, Expand); // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the // range of f32. @@ -5430,6 +5434,18 @@ return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); case ISD::VP_UMAX: return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); + case ISD::VP_BITREVERSE: + return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true); + case ISD::VP_BSWAP: + return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true); + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true); + case ISD::VP_CTTZ: + case ISD::VP_CTTZ_ZERO_UNDEF: + return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true); + case ISD::VP_CTPOP: + return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true); case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: return lowerVPStridedLoad(Op, DAG); case ISD::EXPERIMENTAL_VP_STRIDED_STORE: @@ -16054,6 +16070,11 @@ NODE_NAME_CASE(SMAX_VL) NODE_NAME_CASE(UMIN_VL) NODE_NAME_CASE(UMAX_VL) + NODE_NAME_CASE(BITREVERSE_VL) + NODE_NAME_CASE(BSWAP_VL) + NODE_NAME_CASE(CTLZ_VL) + NODE_NAME_CASE(CTTZ_VL) + NODE_NAME_CASE(CTPOP_VL) NODE_NAME_CASE(FMINNUM_VL) NODE_NAME_CASE(FMAXNUM_VL) NODE_NAME_CASE(MULHS_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -21,6 +21,13 @@ // Helpers to define the VL patterns. //===----------------------------------------------------------------------===// +def SDT_RISCVIntUnOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVec<0>, SDTCisInt<0>, + SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>]>; + def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<0>, SDTCisInt<0>, @@ -96,6 +103,12 @@ def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_bitreverse_vl : SDNode<"RISCVISD::BITREVERSE_VL", SDT_RISCVIntUnOp_VL>; +def riscv_bswap_vl : SDNode<"RISCVISD::BSWAP_VL", SDT_RISCVIntUnOp_VL>; +def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL>; +def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>; +def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>; + def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -220,3 +220,32 @@ defm : VPatUnarySDNode_V; defm : VPatUnarySDNode_V; defm : VPatUnarySDNode_V; + +//===----------------------------------------------------------------------===// +// VL patterns +//===----------------------------------------------------------------------===// + +multiclass VPatUnaryVL_V { + foreach vti = AllIntegerVectors in { + let Predicates = !listconcat([HasStdExtZvbb], + GetVTypePredicates.Predicates) in { + def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$merge), + (vti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_V_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, + vti.RegClass:$rs1, + (vti.Mask V0), + GPR:$vl, + vti.Log2SEW, + TAIL_AGNOSTIC)>; + } + } +} + +defm : VPatUnaryVL_V; +defm : VPatUnaryVL_V; +defm : VPatUnaryVL_V; +defm : VPatUnaryVL_V; +defm : VPatUnaryVL_V; diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB declare @llvm.vp.bitreverse.nxv1i8(, , i32) @@ -28,6 +32,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv1i8( %va, %m, i32 %evl) ret %v } @@ -54,6 +64,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv1i8( %va, %m, i32 %evl) @@ -84,6 +100,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv2i8( %va, %m, i32 %evl) ret %v } @@ -110,6 +132,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv2i8( %va, %m, i32 %evl) @@ -140,6 +168,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv4i8( %va, %m, i32 %evl) ret %v } @@ -166,6 +200,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv4i8( %va, %m, i32 %evl) @@ -196,6 +236,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv8i8( %va, %m, i32 %evl) ret %v } @@ -222,6 +268,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv8i8( %va, %m, i32 %evl) @@ -252,6 +304,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv16i8( %va, %m, i32 %evl) ret %v } @@ -278,6 +336,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v10, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv16i8( %va, %m, i32 %evl) @@ -308,6 +372,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv32i8( %va, %m, i32 %evl) ret %v } @@ -334,6 +404,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v12, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv32i8( %va, %m, i32 %evl) @@ -364,6 +440,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv64i8( %va, %m, i32 %evl) ret %v } @@ -390,6 +472,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v16, v8 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv64i8( %va, %m, i32 %evl) @@ -456,6 +544,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv1i16( %va, %m, i32 %evl) ret %v } @@ -518,6 +612,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv1i16( %va, %m, i32 %evl) @@ -584,6 +684,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv2i16( %va, %m, i32 %evl) ret %v } @@ -646,6 +752,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv2i16( %va, %m, i32 %evl) @@ -712,6 +824,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv4i16( %va, %m, i32 %evl) ret %v } @@ -774,6 +892,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv4i16( %va, %m, i32 %evl) @@ -840,6 +964,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv8i16( %va, %m, i32 %evl) ret %v } @@ -902,6 +1032,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv8i16( %va, %m, i32 %evl) @@ -968,6 +1104,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv16i16( %va, %m, i32 %evl) ret %v } @@ -1030,6 +1172,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv16i16( %va, %m, i32 %evl) @@ -1096,6 +1244,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv32i16( %va, %m, i32 %evl) ret %v } @@ -1158,6 +1312,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv32i16( %va, %m, i32 %evl) @@ -1240,6 +1400,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv1i32( %va, %m, i32 %evl) ret %v } @@ -1318,6 +1484,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv1i32( %va, %m, i32 %evl) @@ -1400,6 +1572,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv2i32( %va, %m, i32 %evl) ret %v } @@ -1478,6 +1656,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv2i32( %va, %m, i32 %evl) @@ -1560,6 +1744,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv4i32( %va, %m, i32 %evl) ret %v } @@ -1638,6 +1828,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv4i32( %va, %m, i32 %evl) @@ -1720,6 +1916,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv8i32( %va, %m, i32 %evl) ret %v } @@ -1798,6 +2000,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv8i32( %va, %m, i32 %evl) @@ -1880,6 +2088,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv16i32( %va, %m, i32 %evl) ret %v } @@ -1958,6 +2172,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv16i32( %va, %m, i32 %evl) @@ -2106,6 +2326,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv1i64( %va, %m, i32 %evl) ret %v } @@ -2250,6 +2476,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv1i64( %va, %m, i32 %evl) @@ -2398,6 +2630,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv2i64( %va, %m, i32 %evl) ret %v } @@ -2542,6 +2780,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv2i64( %va, %m, i32 %evl) @@ -2690,6 +2934,12 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv4i64( %va, %m, i32 %evl) ret %v } @@ -2834,6 +3084,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv4i64( %va, %m, i32 %evl) @@ -3046,6 +3302,12 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv7i64( %va, %m, i32 %evl) ret %v } @@ -3204,6 +3466,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv7i64( %va, %m, i32 %evl) @@ -3416,6 +3684,12 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv8i64( %va, %m, i32 %evl) ret %v } @@ -3574,6 +3848,12 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv8i64( %va, %m, i32 %evl) @@ -3757,6 +4037,29 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 1 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: slli a1, a1, 2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB46_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) ret %v } @@ -3877,6 +4180,24 @@ ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v24, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB47_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB declare @llvm.vp.bswap.nxv1i16(, , i32) @@ -14,6 +18,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv1i16( %va, %m, i32 %evl) ret %v } @@ -26,6 +36,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv1i16( %va, %m, i32 %evl) @@ -42,6 +58,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv2i16( %va, %m, i32 %evl) ret %v } @@ -54,6 +76,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv2i16( %va, %m, i32 %evl) @@ -70,6 +98,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv4i16( %va, %m, i32 %evl) ret %v } @@ -82,6 +116,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv4i16( %va, %m, i32 %evl) @@ -98,6 +138,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv8i16( %va, %m, i32 %evl) ret %v } @@ -110,6 +156,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv8i16( %va, %m, i32 %evl) @@ -126,6 +178,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv16i16( %va, %m, i32 %evl) ret %v } @@ -138,6 +196,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv16i16( %va, %m, i32 %evl) @@ -154,6 +218,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv32i16( %va, %m, i32 %evl) ret %v } @@ -166,6 +236,12 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv32i16( %va, %m, i32 %evl) @@ -206,6 +282,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv1i32( %va, %m, i32 %evl) ret %v } @@ -242,6 +324,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv1i32( %va, %m, i32 %evl) @@ -282,6 +370,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv2i32( %va, %m, i32 %evl) ret %v } @@ -318,6 +412,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv2i32( %va, %m, i32 %evl) @@ -358,6 +458,12 @@ ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv4i32( %va, %m, i32 %evl) ret %v } @@ -394,6 +500,12 @@ ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv4i32( %va, %m, i32 %evl) @@ -434,6 +546,12 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv8i32( %va, %m, i32 %evl) ret %v } @@ -470,6 +588,12 @@ ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv8i32( %va, %m, i32 %evl) @@ -510,6 +634,12 @@ ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv16i32( %va, %m, i32 %evl) ret %v } @@ -546,6 +676,12 @@ ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv16i32( %va, %m, i32 %evl) @@ -628,6 +764,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv1i64( %va, %m, i32 %evl) ret %v } @@ -706,6 +848,12 @@ ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv1i64( %va, %m, i32 %evl) @@ -788,6 +936,12 @@ ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv2i64( %va, %m, i32 %evl) ret %v } @@ -866,6 +1020,12 @@ ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv2i64( %va, %m, i32 %evl) @@ -948,6 +1108,12 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv4i64( %va, %m, i32 %evl) ret %v } @@ -1026,6 +1192,12 @@ ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv4i64( %va, %m, i32 %evl) @@ -1172,6 +1344,12 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv7i64( %va, %m, i32 %evl) ret %v } @@ -1263,6 +1441,12 @@ ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv7i64( %va, %m, i32 %evl) @@ -1409,6 +1593,12 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv8i64( %va, %m, i32 %evl) ret %v } @@ -1500,6 +1690,12 @@ ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv8i64( %va, %m, i32 %evl) @@ -1560,6 +1756,29 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 1 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: slli a1, a1, 2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB32_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) ret %v } @@ -1586,6 +1805,24 @@ ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v24 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB33_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB declare @llvm.vp.ctlz.nxv1i8(, i1 immarg, , i32) @@ -30,6 +34,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -58,6 +68,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 false, %m, i32 %evl) @@ -90,6 +106,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -118,6 +140,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 false, %m, i32 %evl) @@ -150,6 +178,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -178,6 +212,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 false, %m, i32 %evl) @@ -210,6 +250,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -238,6 +284,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 false, %m, i32 %evl) @@ -270,6 +322,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -298,6 +356,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 false, %m, i32 %evl) @@ -330,6 +394,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -358,6 +428,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 false, %m, i32 %evl) @@ -390,6 +466,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -418,6 +500,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 false, %m, i32 %evl) @@ -492,6 +580,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -562,6 +656,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) @@ -636,6 +736,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -706,6 +812,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) @@ -780,6 +892,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -850,6 +968,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) @@ -924,6 +1048,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -994,6 +1124,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) @@ -1068,6 +1204,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -1138,6 +1280,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) @@ -1212,6 +1360,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -1282,6 +1436,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) @@ -1362,6 +1522,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1438,6 +1604,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) @@ -1518,6 +1690,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1594,6 +1772,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) @@ -1674,6 +1858,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1750,6 +1940,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) @@ -1830,6 +2026,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1906,6 +2108,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) @@ -1986,6 +2194,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -2062,6 +2276,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) @@ -2185,6 +2405,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2304,6 +2530,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) @@ -2427,6 +2659,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2546,6 +2784,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) @@ -2669,6 +2913,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2788,6 +3038,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) @@ -2911,6 +3167,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -3030,6 +3292,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) @@ -3153,6 +3421,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -3272,6 +3546,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) @@ -3614,6 +3894,28 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 3 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB46_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -3846,6 +4148,23 @@ ; RV64-NEXT: vmul.vx v8, v8, a6 ; RV64-NEXT: vsrl.vx v8, v8, a7 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB47_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -3876,6 +4195,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3904,6 +4229,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) @@ -3935,6 +4266,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3963,6 +4300,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) @@ -3994,6 +4337,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -4022,6 +4371,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) @@ -4053,6 +4408,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -4081,6 +4442,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) @@ -4112,6 +4479,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -4140,6 +4513,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) @@ -4171,6 +4550,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -4199,6 +4584,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) @@ -4230,6 +4621,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -4258,6 +4655,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) @@ -4331,6 +4734,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4401,6 +4810,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) @@ -4474,6 +4889,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4544,6 +4965,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) @@ -4617,6 +5044,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4687,6 +5120,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) @@ -4760,6 +5199,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4830,6 +5275,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) @@ -4903,6 +5354,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4973,6 +5430,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) @@ -5046,6 +5509,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -5116,6 +5585,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) @@ -5195,6 +5670,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -5271,6 +5752,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) @@ -5350,6 +5837,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -5426,6 +5919,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) @@ -5505,6 +6004,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -5581,6 +6086,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) @@ -5660,6 +6171,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -5736,6 +6253,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) @@ -5815,6 +6338,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -5891,6 +6420,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) @@ -6013,6 +6548,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -6132,6 +6673,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) @@ -6254,6 +6801,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -6373,6 +6926,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) @@ -6495,6 +7054,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -6614,6 +7179,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) @@ -6736,6 +7307,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -6855,6 +7432,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) @@ -6977,6 +7560,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -7096,6 +7685,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) @@ -7436,6 +8031,28 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 3 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB94_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -7668,6 +8285,23 @@ ; RV64-NEXT: vmul.vx v8, v8, a6 ; RV64-NEXT: vsrl.vx v8, v8, a7 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB95_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB declare @llvm.vp.ctpop.nxv1i8(, , i32) @@ -23,6 +27,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv1i8( %va, %m, i32 %evl) ret %v } @@ -44,6 +54,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv1i8( %va, %m, i32 %evl) @@ -69,6 +85,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv2i8( %va, %m, i32 %evl) ret %v } @@ -90,6 +112,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv2i8( %va, %m, i32 %evl) @@ -115,6 +143,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv4i8( %va, %m, i32 %evl) ret %v } @@ -136,6 +170,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv4i8( %va, %m, i32 %evl) @@ -161,6 +201,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv8i8( %va, %m, i32 %evl) ret %v } @@ -182,6 +228,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv8i8( %va, %m, i32 %evl) @@ -207,6 +259,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv16i8( %va, %m, i32 %evl) ret %v } @@ -228,6 +286,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv16i8( %va, %m, i32 %evl) @@ -253,6 +317,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv32i8( %va, %m, i32 %evl) ret %v } @@ -274,6 +344,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv32i8( %va, %m, i32 %evl) @@ -299,6 +375,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv64i8( %va, %m, i32 %evl) ret %v } @@ -320,6 +402,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv64i8( %va, %m, i32 %evl) @@ -376,6 +464,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv1i16( %va, %m, i32 %evl) ret %v } @@ -428,6 +522,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv1i16( %va, %m, i32 %evl) @@ -484,6 +584,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv2i16( %va, %m, i32 %evl) ret %v } @@ -536,6 +642,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv2i16( %va, %m, i32 %evl) @@ -592,6 +704,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv4i16( %va, %m, i32 %evl) ret %v } @@ -644,6 +762,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv4i16( %va, %m, i32 %evl) @@ -700,6 +824,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv8i16( %va, %m, i32 %evl) ret %v } @@ -752,6 +882,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv8i16( %va, %m, i32 %evl) @@ -808,6 +944,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv16i16( %va, %m, i32 %evl) ret %v } @@ -860,6 +1002,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv16i16( %va, %m, i32 %evl) @@ -916,6 +1064,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv32i16( %va, %m, i32 %evl) ret %v } @@ -968,6 +1122,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv32i16( %va, %m, i32 %evl) @@ -1026,6 +1186,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv1i32( %va, %m, i32 %evl) ret %v } @@ -1080,6 +1246,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv1i32( %va, %m, i32 %evl) @@ -1138,6 +1310,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv2i32( %va, %m, i32 %evl) ret %v } @@ -1192,6 +1370,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv2i32( %va, %m, i32 %evl) @@ -1250,6 +1434,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv4i32( %va, %m, i32 %evl) ret %v } @@ -1304,6 +1494,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv4i32( %va, %m, i32 %evl) @@ -1362,6 +1558,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv8i32( %va, %m, i32 %evl) ret %v } @@ -1416,6 +1618,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv8i32( %va, %m, i32 %evl) @@ -1474,6 +1682,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv16i32( %va, %m, i32 %evl) ret %v } @@ -1528,6 +1742,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv16i32( %va, %m, i32 %evl) @@ -1623,6 +1843,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv1i64( %va, %m, i32 %evl) ret %v } @@ -1714,6 +1940,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv1i64( %va, %m, i32 %evl) @@ -1809,6 +2041,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv2i64( %va, %m, i32 %evl) ret %v } @@ -1900,6 +2138,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv2i64( %va, %m, i32 %evl) @@ -1995,6 +2239,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv4i64( %va, %m, i32 %evl) ret %v } @@ -2086,6 +2336,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv4i64( %va, %m, i32 %evl) @@ -2181,6 +2437,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv7i64( %va, %m, i32 %evl) ret %v } @@ -2272,6 +2534,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv7i64( %va, %m, i32 %evl) @@ -2367,6 +2635,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv8i64( %va, %m, i32 %evl) ret %v } @@ -2458,6 +2732,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv8i64( %va, %m, i32 %evl) @@ -2771,6 +3051,28 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 3 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB46_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) ret %v } @@ -2963,6 +3265,23 @@ ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB47_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB declare @llvm.vp.cttz.nxv1i8(, i1 immarg, , i32) @@ -27,6 +31,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -52,6 +62,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i8( %va, i1 false, %m, i32 %evl) @@ -81,6 +97,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -106,6 +128,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i8( %va, i1 false, %m, i32 %evl) @@ -135,6 +163,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -160,6 +194,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i8( %va, i1 false, %m, i32 %evl) @@ -189,6 +229,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -214,6 +260,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i8( %va, i1 false, %m, i32 %evl) @@ -243,6 +295,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -268,6 +326,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i8( %va, i1 false, %m, i32 %evl) @@ -297,6 +361,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv32i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -322,6 +392,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv32i8( %va, i1 false, %m, i32 %evl) @@ -351,6 +427,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv64i8( %va, i1 false, %m, i32 %evl) ret %v } @@ -376,6 +458,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv64i8( %va, i1 false, %m, i32 %evl) @@ -440,6 +528,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -500,6 +594,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i16( %va, i1 false, %m, i32 %evl) @@ -564,6 +664,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -624,6 +730,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i16( %va, i1 false, %m, i32 %evl) @@ -688,6 +800,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -748,6 +866,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i16( %va, i1 false, %m, i32 %evl) @@ -812,6 +936,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -872,6 +1002,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i16( %va, i1 false, %m, i32 %evl) @@ -936,6 +1072,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -996,6 +1138,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i16( %va, i1 false, %m, i32 %evl) @@ -1060,6 +1208,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv32i16( %va, i1 false, %m, i32 %evl) ret %v } @@ -1120,6 +1274,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv32i16( %va, i1 false, %m, i32 %evl) @@ -1186,6 +1346,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1248,6 +1414,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i32( %va, i1 false, %m, i32 %evl) @@ -1314,6 +1486,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1376,6 +1554,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i32( %va, i1 false, %m, i32 %evl) @@ -1442,6 +1626,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1504,6 +1694,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i32( %va, i1 false, %m, i32 %evl) @@ -1570,6 +1766,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1632,6 +1834,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i32( %va, i1 false, %m, i32 %evl) @@ -1698,6 +1906,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i32( %va, i1 false, %m, i32 %evl) ret %v } @@ -1760,6 +1974,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i32( %va, i1 false, %m, i32 %evl) @@ -1863,6 +2083,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -1962,6 +2188,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i64( %va, i1 false, %m, i32 %evl) @@ -2065,6 +2297,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2164,6 +2402,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i64( %va, i1 false, %m, i32 %evl) @@ -2267,6 +2511,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2366,6 +2616,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i64( %va, i1 false, %m, i32 %evl) @@ -2469,6 +2725,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv7i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2568,6 +2830,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv7i64( %va, i1 false, %m, i32 %evl) @@ -2671,6 +2939,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -2770,6 +3044,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i64( %va, i1 false, %m, i32 %evl) @@ -3057,6 +3337,28 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 3 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB46_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) ret %v } @@ -3249,6 +3551,23 @@ ; RV64-NEXT: vmul.vx v8, v8, a6 ; RV64-NEXT: vsrl.vx v8, v8, a7 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB47_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -3276,6 +3595,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3301,6 +3626,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i8( %va, i1 true, %m, i32 %evl) @@ -3329,6 +3660,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3354,6 +3691,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i8( %va, i1 true, %m, i32 %evl) @@ -3382,6 +3725,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3407,6 +3756,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i8( %va, i1 true, %m, i32 %evl) @@ -3435,6 +3790,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3460,6 +3821,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i8( %va, i1 true, %m, i32 %evl) @@ -3488,6 +3855,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3513,6 +3886,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i8( %va, i1 true, %m, i32 %evl) @@ -3541,6 +3920,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv32i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3566,6 +3951,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv32i8( %va, i1 true, %m, i32 %evl) @@ -3594,6 +3985,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv64i8( %va, i1 true, %m, i32 %evl) ret %v } @@ -3619,6 +4016,12 @@ ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv64i8( %va, i1 true, %m, i32 %evl) @@ -3682,6 +4085,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -3742,6 +4151,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, %m, i32 %evl) @@ -3805,6 +4220,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -3865,6 +4286,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) @@ -3928,6 +4355,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -3988,6 +4421,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) @@ -4051,6 +4490,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4111,6 +4556,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) @@ -4174,6 +4625,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4234,6 +4691,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) @@ -4297,6 +4760,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) ret %v } @@ -4357,6 +4826,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) @@ -4422,6 +4897,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -4484,6 +4965,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) @@ -4549,6 +5036,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -4611,6 +5104,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) @@ -4676,6 +5175,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -4738,6 +5243,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) @@ -4803,6 +5314,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -4865,6 +5382,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) @@ -4930,6 +5453,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 24, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) ret %v } @@ -4992,6 +5521,12 @@ ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 24 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) @@ -5094,6 +5629,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -5193,6 +5734,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) @@ -5295,6 +5842,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -5394,6 +5947,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) @@ -5496,6 +6055,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -5595,6 +6160,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) @@ -5697,6 +6268,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -5796,6 +6373,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) @@ -5898,6 +6481,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -5997,6 +6586,12 @@ ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) @@ -6282,6 +6877,28 @@ ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: srli a2, a1, 3 +; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v16, v16, v0.t +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB94_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB94_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v } @@ -6474,6 +7091,23 @@ ; RV64-NEXT: vmul.vx v8, v8, a6 ; RV64-NEXT: vsrl.vx v8, v8, a7 ; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: sub a2, a0, a1 +; CHECK-ZVBB-NEXT: sltu a3, a0, a2 +; CHECK-ZVBB-NEXT: addi a3, a3, -1 +; CHECK-ZVBB-NEXT: and a2, a3, a2 +; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v16, v16 +; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB95_2 +; CHECK-ZVBB-NEXT: # %bb.1: +; CHECK-ZVBB-NEXT: mv a0, a1 +; CHECK-ZVBB-NEXT: .LBB95_2: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8 +; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl)