diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -543,6 +543,7 @@ SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; + SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -411,6 +411,11 @@ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + static unsigned IntegerVPOps[] = { + ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV, + ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, + ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL}; + if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector // element type being illegal. @@ -496,6 +501,13 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + for (unsigned VPOpc : IntegerVPOps) { + setOperationAction(VPOpc, VT, Custom); + // RV64 must custom-legalize the i32 EVL parameter. + if (Subtarget.is64Bit()) + setOperationAction(VPOpc, MVT::i32, Custom); + } + setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); @@ -695,6 +707,13 @@ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + + for (unsigned VPOpc : IntegerVPOps) { + setOperationAction(VPOpc, VT, Custom); + // RV64 must custom-legalize the i32 EVL parameter. + if (Subtarget.is64Bit()) + setOperationAction(VPOpc, MVT::i32, Custom); + } } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -2367,6 +2386,32 @@ return lowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return lowerSET_ROUNDING(Op, DAG); + case ISD::VP_ADD: + return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); + case ISD::VP_SUB: + return lowerVPOp(Op, DAG, RISCVISD::SUB_VL); + case ISD::VP_MUL: + return lowerVPOp(Op, DAG, RISCVISD::MUL_VL); + case ISD::VP_SDIV: + return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL); + case ISD::VP_UDIV: + return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL); + case ISD::VP_SREM: + return lowerVPOp(Op, DAG, RISCVISD::SREM_VL); + case ISD::VP_UREM: + return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); + case ISD::VP_AND: + return lowerVPOp(Op, DAG, RISCVISD::AND_VL); + case ISD::VP_OR: + return lowerVPOp(Op, DAG, RISCVISD::OR_VL); + case ISD::VP_XOR: + return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); + case ISD::VP_ASHR: + return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); + case ISD::VP_LSHR: + return lowerVPOp(Op, DAG, RISCVISD::SRL_VL); + case ISD::VP_SHL: + return lowerVPOp(Op, DAG, RISCVISD::SHL_VL); } } @@ -2828,12 +2873,18 @@ // legal equivalently-sized i8 type, so we can use that as a go-between. SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const { - SDValue SplatVal = Op.getOperand(0); - // All-zeros or all-ones splats are handled specially. - if (isa(SplatVal)) - return Op; SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); + SDValue SplatVal = Op.getOperand(0); + // All-zeros or all-ones splats are handled specially. + if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { + SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; + return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); + } + if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { + SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; + return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); + } MVT XLenVT = Subtarget.getXLenVT(); assert(SplatVal.getValueType() == XLenVT && "Unexpected type for i1 splat value"); @@ -4215,6 +4266,50 @@ return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); } +// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: +// * Operands of each node are assumed to be in the same order. +// * The EVL operand is promoted from i32 to i64 on RV64. +// * Fixed-length vectors are converted to their scalable-vector container +// types. +SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, + unsigned RISCVISDOpc) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + Optional EVLIdx = ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()); + + SmallVector Ops; + MVT XLenVT = Subtarget.getXLenVT(); + + for (const auto &OpIdx : enumerate(Op->ops())) { + SDValue V = OpIdx.value(); + if ((unsigned)OpIdx.index() == EVLIdx) { + Ops.push_back(DAG.getZExtOrTrunc(V, DL, XLenVT)); + continue; + } + assert(!isa(V) && "Unexpected VTSDNode node!"); + // Pass through operands which aren't fixed-length vectors. + if (!V.getValueType().isFixedLengthVector()) { + Ops.push_back(V); + continue; + } + // "cast" fixed length vector to a scalable vector. + MVT OpVT = V.getSimpleValueType(); + MVT ContainerVT = getContainerForFixedLengthVector(OpVT); + assert(useRVVForFixedLengthVectorVT(OpVT) && + "Only fixed length vectors are supported!"); + Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); + } + + if (!VT.isFixedLengthVector()) + return DAG.getNode(RISCVISDOpc, DL, VT, Ops); + + MVT ContainerVT = getContainerForFixedLengthVector(VT); + + SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops); + + return convertFromScalableVector(VT, VPOp, DAG, Subtarget); +} + // Custom lower MGATHER to a legalized form for RVV. It will then be matched to // a RVV indexed load. The RVV indexed load instructions only support the // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -28,6 +28,14 @@ def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>; +def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>, + SDTCisVT<1, XLenVT>]>; +def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; +def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; + +def rvv_vnot : PatFrag<(ops node:$in), + (xor node:$in, (riscv_vmset_vl (XLenVT srcvalue)))>; + // Give explicit Complexity to prefer simm5/uimm5. def SplatPat : ComplexPattern; def SplatPat_simm5 : ComplexPattern; @@ -503,25 +511,25 @@ (!cast("PseudoVMXOR_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask (vnot (and VR:$rs1, VR:$rs2))), + def : Pat<(mti.Mask (rvv_vnot (and VR:$rs1, VR:$rs2))), (!cast("PseudoVMNAND_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask (vnot (or VR:$rs1, VR:$rs2))), + def : Pat<(mti.Mask (rvv_vnot (or VR:$rs1, VR:$rs2))), (!cast("PseudoVMNOR_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask (vnot (xor VR:$rs1, VR:$rs2))), + def : Pat<(mti.Mask (rvv_vnot (xor VR:$rs1, VR:$rs2))), (!cast("PseudoVMXNOR_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask (and VR:$rs1, (vnot VR:$rs2))), + def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))), (!cast("PseudoVMANDNOT_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask (or VR:$rs1, (vnot VR:$rs2))), + def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))), (!cast("PseudoVMORNOT_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; - // Handle vnot the same as the vnot.mm pseudoinstruction. - def : Pat<(mti.Mask (vnot VR:$rs)), + // Handle rvv_vnot the same as the vnot.mm pseudoinstruction. + def : Pat<(mti.Mask (rvv_vnot VR:$rs)), (!cast("PseudoVMNAND_MM_"#mti.LMul.MX) VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>; } @@ -725,13 +733,6 @@ (!cast("PseudoVMV_V_I_" # vti.LMul.MX) simm5:$rs1, vti.AVL, vti.Log2SEW)>; } - -foreach mti = AllMasks in { - def : Pat<(mti.Mask immAllOnesV), - (!cast("PseudoVMSET_M_"#mti.BX) mti.AVL, mti.Log2SEW)>; - def : Pat<(mti.Mask immAllZerosV), - (!cast("PseudoVMCLR_M_"#mti.BX) mti.AVL, mti.Log2SEW)>; -} } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -188,11 +188,6 @@ def riscv_vmor_vl : SDNode<"RISCVISD::VMOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>; def riscv_vmxor_vl : SDNode<"RISCVISD::VMXOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>; -def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>, - SDTCisVT<1, XLenVT>]>; -def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; -def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; - def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl), @@ -243,38 +238,49 @@ def sew32uimm5 : ComplexPattern", []>; def sew64uimm5 : ComplexPattern", []>; -class VPatBinaryVL_VV : - Pat<(result_type (vop - (op_type op_reg_class:$rs1), - (op_type op_reg_class:$rs2), - (mask_type true_mask), - VLOpFrag)), - (!cast(instruction_name#"_VV_"# vlmul.MX) +multiclass VPatBinaryVL_VV { + def : Pat<(result_type (vop + (op_type op_reg_class:$rs1), + (op_type op_reg_class:$rs2), + (mask_type true_mask), + VLOpFrag)), + (!cast(instruction_name#"_VV_"# vlmul.MX) + op_reg_class:$rs1, + op_reg_class:$rs2, + GPR:$vl, sew)>; + def : Pat<(result_type (vop + (op_type op_reg_class:$rs1), + (op_type op_reg_class:$rs2), + (mask_type VMV0:$vm), + VLOpFrag)), + (!cast(instruction_name#"_VV_"# vlmul.MX#"_MASK") + (op_type (IMPLICIT_DEF)), op_reg_class:$rs1, op_reg_class:$rs2, - GPR:$vl, sew)>; + VMV0:$vm, GPR:$vl, sew)>; +} -class VPatBinaryVL_XI : - Pat<(result_type (vop +multiclass VPatBinaryVL_XI { + def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1), (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))), (mask_type true_mask), @@ -283,34 +289,45 @@ vop_reg_class:$rs1, xop_kind:$rs2, GPR:$vl, sew)>; + def : Pat<(result_type (vop + (vop_type vop_reg_class:$rs1), + (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))), + (mask_type VMV0:$vm), + VLOpFrag)), + (!cast(instruction_name#_#suffix#_# vlmul.MX#"_MASK") + (vop_type (IMPLICIT_DEF)), + vop_reg_class:$rs1, + xop_kind:$rs2, + VMV0:$vm, GPR:$vl, sew)>; +} multiclass VPatBinaryVL_VV_VX { foreach vti = AllIntegerVectors in { - def : VPatBinaryVL_VV; - def : VPatBinaryVL_XI; + defm : VPatBinaryVL_VV; + defm : VPatBinaryVL_XI; } } multiclass VPatBinaryVL_VV_VX_VI { foreach vti = AllIntegerVectors in { - def : VPatBinaryVL_VV; - def : VPatBinaryVL_XI; - def : VPatBinaryVL_XI(SplatPat#_#ImmType), - ImmType>; + defm : VPatBinaryVL_VV; + defm : VPatBinaryVL_XI; + defm : VPatBinaryVL_XI(SplatPat#_#ImmType), + ImmType>; } } @@ -335,9 +352,9 @@ multiclass VPatBinaryFPVL_VV_VF { foreach vti = AllFloatVectors in { - def : VPatBinaryVL_VV; + defm : VPatBinaryVL_VV; def : VPatBinaryVL_VF("PseudoVRSUB_VX_"# vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), + (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + VMV0:$vm, GPR:$vl, vti.Log2SEW)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), + (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2, + VMV0:$vm, GPR:$vl, vti.Log2SEW)>; } // 12.3. Vector Integer Extension diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -131,6 +131,14 @@ bool isLegalMaskedScatter(Type *DataType, Align Alignment) { return isLegalMaskedGatherScatter(DataType, Alignment); } + + /// \returns How the target needs this vector-predicated operation to be + /// transformed. + TargetTransformInfo::VPLegalization + getVPLegalizationStrategy(const VPIntrinsic &PI) const { + using VPLegalization = TargetTransformInfo::VPLegalization; + return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); + } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -0,0 +1,1333 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vadd_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vadd_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vadd_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vadd_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vadd_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.add.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vadd_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vadd_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.add.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vadd_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.add.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vadd_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vadd_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.add.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vadd_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vadd_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.add.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vadd_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vadd_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.add.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vadd_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vadd_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vadd_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vadd_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vadd_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vadd_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.add.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vadd_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vadd_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.add.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vadd_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.add.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vadd_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vadd_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.add.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vadd_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll @@ -0,0 +1,981 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vrsub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vrsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vrsub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 2, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vrsub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 2, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.sub.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vrsub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vrsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vrsub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 2, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vrsub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 2, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.sub.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vrsub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vrsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vrsub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 2, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vrsub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 2, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.sub.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vrsub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vrsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vrsub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 2, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vrsub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 2, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.sub.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vrsub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vrsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vrsub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 2, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vrsub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 2, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.sub.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vrsub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vrsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vrsub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 2, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vrsub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 2, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.sub.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vrsub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vrsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vrsub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 2, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vrsub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 2, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.sub.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vrsub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vrsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vrsub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 2, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vrsub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 2, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vrsub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vrsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vrsub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 2, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vrsub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 2, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vrsub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vrsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vrsub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 2, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vrsub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 2, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vrsub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vrsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vrsub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 2, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vrsub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 2, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.sub.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vrsub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vrsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vrsub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 2, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vrsub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 2, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.sub.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vrsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v25, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vrsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v25, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vrsub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 2, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vrsub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 2, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.sub.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vrsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v26, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vrsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v26, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vrsub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 2, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vrsub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 2, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.sub.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vrsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v28, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vrsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v28, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vrsub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 2, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vrsub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 2, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.sub.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vrsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vrsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v16, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vrsub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 2, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vrsub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 2, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -0,0 +1,1333 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vsra_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsra_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsra_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsra_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsra_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 5, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsra_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 5, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vsra_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsra_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsra_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsra_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsra_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 5, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsra_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 5, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vsra_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsra_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsra_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsra_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsra_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 5, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsra_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 5, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vsra_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsra_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsra_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsra_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsra_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 5, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsra_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 5, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vsra_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsra_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsra_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsra_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsra_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 5, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsra_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 5, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vsra_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsra_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsra_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsra_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsra_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 5, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsra_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 5, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vsra_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsra_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsra_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsra_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsra_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 5, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsra_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 5, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vsra_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsra_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsra_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsra_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsra_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 5, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsra_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 5, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vsra_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsra_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsra_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsra_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsra_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 5, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsra_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 5, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vsra_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsra_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsra_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsra_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsra_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 5, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsra_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 5, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vsra_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsra_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsra_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsra_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsra_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 5, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsra_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 5, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vsra_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsra_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsra_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsra_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsra_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 5, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsra_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 5, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vsra_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsra_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsra_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsra_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsra_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 5, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsra_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 5, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vsra_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsra_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsra_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsra_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsra_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 5, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsra_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 5, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vsra_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsra_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsra_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsra_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsra_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 5, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsra_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 5, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vsra_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsra_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsra_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsra_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsra_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 5, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsra_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 5, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll @@ -0,0 +1,917 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vsub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.sub.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vsub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.sub.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vsub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.sub.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vsub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.sub.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vsub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.sub.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vsub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.sub.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vsub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.sub.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vsub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vsub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vsub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vsub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.sub.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vsub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.sub.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vsub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.sub.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vsub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> undef, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.sub.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vsub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> undef, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.sub.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vsub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> undef, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -0,0 +1,1789 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.add.nxv1i8(, , , i32) + +define @vadd_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv2i8(, , , i32) + +define @vadd_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv4i8(, , , i32) + +define @vadd_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i8(, , , i32) + +define @vadd_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv16i8(, , , i32) + +define @vadd_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv32i8(, , , i32) + +define @vadd_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv32i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv32i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv64i8(, , , i32) + +define @vadd_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv64i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv64i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv1i16(, , , i32) + +define @vadd_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv2i16(, , , i32) + +define @vadd_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv4i16(, , , i32) + +define @vadd_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i16(, , , i32) + +define @vadd_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv16i16(, , , i32) + +define @vadd_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv32i16(, , , i32) + +define @vadd_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv32i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv1i32(, , , i32) + +define @vadd_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv2i32(, , , i32) + +define @vadd_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv4i32(, , , i32) + +define @vadd_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i32(, , , i32) + +define @vadd_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv16i32(, , , i32) + +define @vadd_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv1i64(, , , i32) + +define @vadd_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv2i64(, , , i32) + +define @vadd_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv4i64(, , , i32) + +define @vadd_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i64(, , , i32) + +define @vadd_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.add.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vadd_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vadd_vi_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 -1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll @@ -0,0 +1,1305 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.sub.nxv1i8(, , , i32) + +define @vrsub_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i8(, , , i32) + +define @vrsub_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i8(, , , i32) + +define @vrsub_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i8(, , , i32) + +define @vrsub_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i8(, , , i32) + +define @vrsub_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv32i8(, , , i32) + +define @vrsub_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv32i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv32i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv64i8(, , , i32) + +define @vrsub_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv64i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv64i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i16(, , , i32) + +define @vrsub_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i16(, , , i32) + +define @vrsub_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i16(, , , i32) + +define @vrsub_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i16(, , , i32) + +define @vrsub_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i16(, , , i32) + +define @vrsub_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv32i16(, , , i32) + +define @vrsub_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv32i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i32(, , , i32) + +define @vrsub_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i32(, , , i32) + +define @vrsub_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i32(, , , i32) + +define @vrsub_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i32(, , , i32) + +define @vrsub_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i32(, , , i32) + +define @vrsub_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vrsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i64(, , , i32) + +define @vrsub_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v25, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v25, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i64(, , , i32) + +define @vrsub_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v26, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v26, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i64(, , , i32) + +define @vrsub_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v28, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v28, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i64(, , , i32) + +define @vrsub_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vrsub_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v16, v8 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vrsub_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vrsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vrsub_vi_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vrsub_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vrsub.vi v8, v8, 2 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -0,0 +1,1789 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.ashr.nxv1i8(, , , i32) + +define @vsra_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv2i8(, , , i32) + +define @vsra_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv4i8(, , , i32) + +define @vsra_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv8i8(, , , i32) + +define @vsra_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv16i8(, , , i32) + +define @vsra_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv32i8(, , , i32) + +define @vsra_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv32i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv32i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv64i8(, , , i32) + +define @vsra_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv64i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv64i8_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv1i16(, , , i32) + +define @vsra_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv2i16(, , , i32) + +define @vsra_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv4i16(, , , i32) + +define @vsra_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv8i16(, , , i32) + +define @vsra_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv16i16(, , , i32) + +define @vsra_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv32i16(, , , i32) + +define @vsra_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv32i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv32i16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv1i32(, , , i32) + +define @vsra_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv2i32(, , , i32) + +define @vsra_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv4i32(, , , i32) + +define @vsra_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv8i32(, , , i32) + +define @vsra_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv16i32(, , , i32) + +define @vsra_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vsra.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv16i32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv1i64(, , , i32) + +define @vsra_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv1i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv2i64(, , , i32) + +define @vsra_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv2i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv4i64(, , , i32) + +define @vsra_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv4i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.ashr.nxv8i64(, , , i32) + +define @vsra_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.ashr.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsra_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsra.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsra_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsra.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsra_vi_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsra.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll @@ -0,0 +1,1217 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.sub.nxv1i8(, , , i32) + +define @vsub_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i8(, , , i32) + +define @vsub_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i8(, , , i32) + +define @vsub_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i8(, , , i32) + +define @vsub_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i8(, , , i32) + +define @vsub_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv32i8(, , , i32) + +define @vsub_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv64i8(, , , i32) + +define @vsub_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i16(, , , i32) + +define @vsub_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i16(, , , i32) + +define @vsub_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i16(, , , i32) + +define @vsub_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i16(, , , i32) + +define @vsub_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i16(, , , i32) + +define @vsub_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv32i16(, , , i32) + +define @vsub_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i32(, , , i32) + +define @vsub_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i32(, , , i32) + +define @vsub_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i32(, , , i32) + +define @vsub_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i32(, , , i32) + +define @vsub_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv16i32(, , , i32) + +define @vsub_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu +; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv1i64(, , , i32) + +define @vsub_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v25, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv2i64(, , , i32) + +define @vsub_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v26, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv4i64(, , , i32) + +define @vsub_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v28, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.sub.nxv8i64(, , , i32) + +define @vsub_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.sub.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsub_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vsub_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsub_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsub_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.sub.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +}