diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -253,6 +253,10 @@ // Vector select with an additional VL operand. This operation is unmasked. VSELECT_VL, + // Vector select with operand #2 (the value when the condition is false) tied + // to the destination and an additional VL operand. This operation is + // unmasked. + VP_MERGE_VL, // Mask binary operators. VMAND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -521,12 +521,13 @@ ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_SELECT}; + ISD::VP_MERGE, ISD::VP_SELECT}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT}; + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, + ISD::VP_SELECT}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector @@ -3441,6 +3442,8 @@ return lowerSET_ROUNDING(Op, DAG); case ISD::VP_SELECT: return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); + case ISD::VP_MERGE: + return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); case ISD::VP_ADD: return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); case ISD::VP_SUB: @@ -10087,6 +10090,7 @@ NODE_NAME_CASE(VWADDU_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) + NODE_NAME_CASE(VP_MERGE_VL) NODE_NAME_CASE(VMAND_VL) NODE_NAME_CASE(VMOR_VL) NODE_NAME_CASE(VMXOR_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -579,10 +579,11 @@ !subst("_B64", "", !subst("_MASK", "", !subst("_TIED", "", + !subst("_TU", "", !subst("F16", "F", !subst("F32", "F", !subst("F64", "F", - !subst("Pseudo", "", PseudoInst)))))))))))))))))))); + !subst("Pseudo", "", PseudoInst))))))))))))))))))))); } // The destination vector register group for a masked vector instruction cannot @@ -928,6 +929,9 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +// Special version of VPseudoBinaryNoMask where we pretend the first source is +// tied to the destination. +// This allows maskedoff and rs2 to be the same register. class VPseudoTiedBinaryNoMask : @@ -1079,6 +1083,30 @@ let VLMul = MInfo.value; } +class VPseudoTiedBinaryCarryIn : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, + ixlenimm:$sew), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 0; + let BaseInstr = !cast(PseudoToVInst.VInst); + let VLMul = MInfo.value; +} + class VPseudoTernaryNoMask; } +multiclass VPseudoTiedBinaryV_VM { + foreach m = MxList in + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; +} + multiclass VPseudoBinaryV_XM { foreach m = MxList in @@ -1751,13 +1789,29 @@ m.vrclass, GPR, m, CarryIn, Constraint>; } +multiclass VPseudoTiedBinaryV_XM { + foreach m = MxList in + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; +} + multiclass VPseudoVMRG_FM { foreach f = FPList in - foreach m = f.MxList in + foreach m = f.MxList in { def "_V" # f.FX # "M_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + // Tied version to allow codegen control over the tail elements + def "_V" # f.FX # "M_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + } } multiclass VPseudoBinaryV_IM; } +multiclass VPseudoTiedBinaryV_IM { + foreach m = MxList in + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; +} + multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { @@ -2104,6 +2168,13 @@ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; defm "" : VPseudoBinaryV_IM, Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM_IM { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -177,14 +177,13 @@ SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; -def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, - SDTCisVec<1>, - SDTCisSameNumEltsAs<0, 1>, - SDTCVecEltisVT<1, i1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>, - SDTCisVT<4, XLenVT>]>>; +def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT> +]>; + +def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; +def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>; def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -976,6 +975,30 @@ VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat_simm5 simm5:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } // 12.16. Vector Integer Move Instructions @@ -1223,6 +1246,31 @@ (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + fvti.RegClass:$rs1, + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + // 14.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -0,0 +1,953 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) + +define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) + +define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) + +define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) + +define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) + +define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) + +define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) + +define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) + +define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) + +define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) + +define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) + +define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) + +define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) + +define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32) + +define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %a, i32 0 + %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %a, i32 0 + %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %a, i32 0 + %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %a, i32 0 + %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %a, i32 0 + %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %a, i32 0 + %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %a, i32 0 + %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %a, i32 0 + %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %a, i32 0 + %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %a, i32 0 + %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %a, i32 0 + %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32) + +define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} + +define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x double> poison, double %a, i32 0 + %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -0,0 +1,1280 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.merge.nxv1i8(, , , i32) + +define @vpmerge_vv_nxv1i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i8(, , , i32) + +define @vpmerge_vv_nxv2i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i8(, , , i32) + +define @vpmerge_vv_nxv4i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i8(, , , i32) + +define @vpmerge_vv_nxv8i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i8(, , , i32) + +define @vpmerge_vv_nxv16i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i8(, , , i32) + +define @vpmerge_vv_nxv32i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv64i8(, , , i32) + +define @vpmerge_vv_nxv64i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv64i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv64i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i16(, , , i32) + +define @vpmerge_vv_nxv1i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i16(, , , i32) + +define @vpmerge_vv_nxv2i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i16(, , , i32) + +define @vpmerge_vv_nxv4i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i16(, , , i32) + +define @vpmerge_vv_nxv8i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i16(, , , i32) + +define @vpmerge_vv_nxv16i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i16(, , , i32) + +define @vpmerge_vv_nxv32i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i32(, , , i32) + +define @vpmerge_vv_nxv1i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i32(, , , i32) + +define @vpmerge_vv_nxv2i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i32(, , , i32) + +define @vpmerge_vv_nxv4i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i32(, , , i32) + +define @vpmerge_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i32(, , , i32) + +define @vpmerge_vv_nxv16i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i64(, , , i32) + +define @vpmerge_vv_nxv1i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i64(, , , i32) + +define @vpmerge_vv_nxv2i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i64(, , , i32) + +define @vpmerge_vv_nxv4i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i64(, , , i32) + +define @vpmerge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f16(, , , i32) + +define @vpmerge_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f16(, , , i32) + +define @vpmerge_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f16(, , , i32) + +define @vpmerge_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f16(, , , i32) + +define @vpmerge_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f16(, , , i32) + +define @vpmerge_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32f16(, , , i32) + +define @vpmerge_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv32f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f32(, , , i32) + +define @vpmerge_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f32(, , , i32) + +define @vpmerge_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f32(, , , i32) + +define @vpmerge_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f32(, , , i32) + +define @vpmerge_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f32(, , , i32) + +define @vpmerge_vv_nxv16f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f64(, , , i32) + +define @vpmerge_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f64(, , , i32) + +define @vpmerge_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f64(, , , i32) + +define @vpmerge_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f64(, , , i32) + +define @vpmerge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +}