diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -327,6 +327,34 @@ return N->hasOneUse(); }]>; +def riscv_vfmadd_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_vfmadd_vl node:$A, node:$B, + node:$C, node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + +def riscv_vfnmadd_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_vfnmadd_vl node:$A, node:$B, + node:$C, node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + +def riscv_vfmsub_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_vfmsub_vl node:$A, node:$B, + node:$C, node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + +def riscv_vfnmsub_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_vfnmsub_vl node:$A, node:$B, + node:$C, node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", "FADD", "SEQ_FADD", "FMIN", "FMAX"] in def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>; @@ -1057,6 +1085,54 @@ } } +multiclass VPatFPMulAccVL_VV_VF { + foreach vti = AllFloatVectors in { + defvar suffix = vti.LMul.MX; + def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask), + (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix #"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask), + (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vselect_vl (vti.Mask V0), + (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix #"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vselect_vl (vti.Mask V0), + (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, + vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + multiclass VPatWidenFPMulAccVL_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; @@ -1393,6 +1469,10 @@ defm : VPatFPMulAddVL_VV_VF; defm : VPatFPMulAddVL_VV_VF; defm : VPatFPMulAddVL_VV_VF; +defm : VPatFPMulAccVL_VV_VF; +defm : VPatFPMulAccVL_VV_VF; +defm : VPatFPMulAccVL_VV_VF; +defm : VPatFPMulAccVL_VV_VF; // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions defm : VPatWidenFPMulAccVL_VV_VF; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll @@ -0,0 +1,1529 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) +declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vfmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vf_v2f16(<2 x half> %va, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vf_v2f16_commute(<2 x half> %va, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vf_v2f16_ta(<2 x half> %va, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmacc_vf_v2f16_commute_ta(<2 x half> %va, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.fneg.v4f16(<4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) +declare <4 x half> @llvm.vp.select.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vfmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vf_v4f16(<4 x half> %va, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vf_v4f16_commute(<4 x half> %va, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vf_v4f16_ta(<4 x half> %va, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmacc_vf_v4f16_commute_ta(<4 x half> %va, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.fneg.v8f16(<8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) +declare <8 x half> @llvm.vp.select.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vfmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vf_v8f16(<8 x half> %va, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vf_v8f16_commute(<8 x half> %va, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vf_v8f16_ta(<8 x half> %va, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmacc_vf_v8f16_commute_ta(<8 x half> %va, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.fneg.v16f16(<16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) +declare <16 x half> @llvm.vp.select.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vfmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vf_v16f16(<16 x half> %va, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vf_v16f16_commute(<16 x half> %va, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vf_v16f16_ta(<16 x half> %va, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmacc_vf_v16f16_commute_ta(<16 x half> %va, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +declare <32 x half> @llvm.vp.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.fneg.v32f16(<32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.merge.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) +declare <32 x half> @llvm.vp.select.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) + +define <32 x half> @vfmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vf_v32f16(<32 x half> %va, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vf_v32f16_commute(<32 x half> %va, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vf_v32f16_unmasked(<32 x half> %va, half %b, <32 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vf_v32f16_ta(<32 x half> %va, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmacc_vf_v32f16_commute_ta(<32 x half> %va, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.fneg.v2f32(<2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) +declare <2 x float> @llvm.vp.select.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vfmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vf_v2f32(<2 x float> %va, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vf_v2f32_commute(<2 x float> %va, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vf_v2f32_ta(<2 x float> %va, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmacc_vf_v2f32_commute_ta(<2 x float> %va, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) +declare <4 x float> @llvm.vp.select.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vfmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vf_v4f32(<4 x float> %va, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vf_v4f32_commute(<4 x float> %va, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vf_v4f32_ta(<4 x float> %va, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmacc_vf_v4f32_commute_ta(<4 x float> %va, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) +declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vfmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vf_v8f32(<8 x float> %va, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vf_v8f32_commute(<8 x float> %va, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vf_v8f32_ta(<8 x float> %va, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmacc_vf_v8f32_commute_ta(<8 x float> %va, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.fneg.v16f32(<16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) +declare <16 x float> @llvm.vp.select.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vfmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vf_v16f32(<16 x float> %va, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vf_v16f32_commute(<16 x float> %va, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vf_v16f32_ta(<16 x float> %va, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmacc_vf_v16f32_commute_ta(<16 x float> %va, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.fneg.v2f64(<2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) +declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vfmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vf_v2f64(<2 x double> %va, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vf_v2f64_commute(<2 x double> %va, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vf_v2f64_ta(<2 x double> %va, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmacc_vf_v2f64_commute_ta(<2 x double> %va, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.fneg.v4f64(<4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) +declare <4 x double> @llvm.vp.select.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vfmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vf_v4f64(<4 x double> %va, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vf_v4f64_commute(<4 x double> %va, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vf_v4f64_ta(<4 x double> %va, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmacc_vf_v4f64_commute_ta(<4 x double> %va, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fneg.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) +declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vfmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vf_v8f64(<8 x double> %va, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vf_v8f64_commute(<8 x double> %va, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vf_v8f64_ta(<8 x double> %va, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmacc_vf_v8f64_commute_ta(<8 x double> %va, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_v8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll @@ -0,0 +1,1625 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) +declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vfmsac_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfmsac_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.fneg.v4f16(<4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) +declare <4 x half> @llvm.vp.select.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vfmsac_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfmsac_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.fneg.v8f16(<8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) +declare <8 x half> @llvm.vp.select.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vfmsac_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfmsac_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.fneg.v16f16(<16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) +declare <16 x half> @llvm.vp.select.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vfmsac_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vf_v16f16_commute(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfmsac_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +declare <32 x half> @llvm.vp.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.fneg.v32f16(<32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.merge.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) +declare <32 x half> @llvm.vp.select.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) + +define <32 x half> @vfmsac_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vf_v32f16_commute(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfmsac_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.fneg.v2f32(<2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) +declare <2 x float> @llvm.vp.select.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vfmsac_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vf_v2f32_commute(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfmsac_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) +declare <4 x float> @llvm.vp.select.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vfmsac_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vf_v4f32_commute(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfmsac_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) +declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vfmsac_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vf_v8f32_commute(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfmsac_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.fneg.v16f32(<16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) +declare <16 x float> @llvm.vp.select.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vfmsac_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vf_v16f32(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vf_v16f32_commute(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfmsac_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.fneg.v2f64(<2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) +declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vfmsac_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vf_v2f64(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vf_v2f64_commute(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfmsac_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.fneg.v4f64(<4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) +declare <4 x double> @llvm.vp.select.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vfmsac_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vf_v4f64(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vf_v4f64_commute(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfmsac_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fneg.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) +declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vfmsac_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vf_v8f64(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vf_v8f64_commute(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vv_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfmsac_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsac_vf_v8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll @@ -0,0 +1,1721 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) +declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vfnmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +define <2 x half> @vfnmacc_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %b, i32 0 + %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) + ret <2 x half> %u +} + +declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.fneg.v4f16(<4 x half>, <4 x i1>, i32) +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) +declare <4 x half> @llvm.vp.select.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vfnmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +define <4 x half> @vfnmacc_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %b, i32 0 + %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) + ret <4 x half> %u +} + +declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.fneg.v8f16(<8 x half>, <8 x i1>, i32) +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) +declare <8 x half> @llvm.vp.select.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vfnmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +define <8 x half> @vfnmacc_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %b, i32 0 + %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) + ret <8 x half> %u +} + +declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.fneg.v16f16(<16 x half>, <16 x i1>, i32) +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) +declare <16 x half> @llvm.vp.select.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vfnmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vf_v16f16_commute(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +define <16 x half> @vfnmacc_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %b, i32 0 + %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) + ret <16 x half> %u +} + +declare <32 x half> @llvm.vp.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.fneg.v32f16(<32 x half>, <32 x i1>, i32) +declare <32 x half> @llvm.vp.merge.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) +declare <32 x half> @llvm.vp.select.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32) + +define <32 x half> @vfnmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vf_v32f16_commute(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x half> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +define <32 x half> @vfnmacc_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x half> poison, half %b, i32 0 + %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) + ret <32 x half> %u +} + +declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.fneg.v2f32(<2 x float>, <2 x i1>, i32) +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) +declare <2 x float> @llvm.vp.select.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vfnmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vf_v2f32_commute(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +define <2 x float> @vfnmacc_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) + ret <2 x float> %u +} + +declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32) +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) +declare <4 x float> @llvm.vp.select.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vfnmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vf_v4f32_commute(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +define <4 x float> @vfnmacc_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %b, i32 0 + %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) + ret <4 x float> %u +} + +declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) +declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vfnmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vf_v8f32_commute(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +define <8 x float> @vfnmacc_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %b, i32 0 + %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) + ret <8 x float> %u +} + +declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.fneg.v16f32(<16 x float>, <16 x i1>, i32) +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) +declare <16 x float> @llvm.vp.select.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vfnmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vf_v16f32(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vf_v16f32_commute(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x float> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +define <16 x float> @vfnmacc_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %b, i32 0 + %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) + ret <16 x float> %u +} + +declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.fneg.v2f64(<2 x double>, <2 x i1>, i32) +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) +declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vfnmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vf_v2f64(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vf_v2f64_commute(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +define <2 x double> @vfnmacc_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %b, i32 0 + %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) + ret <2 x double> %u +} + +declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.fneg.v4f64(<4 x double>, <4 x i1>, i32) +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) +declare <4 x double> @llvm.vp.select.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vfnmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vf_v4f64(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vf_v4f64_commute(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +define <4 x double> @vfnmacc_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %b, i32 0 + %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) + ret <4 x double> %u +} + +declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.fneg.v8f64(<8 x double>, <8 x i1>, i32) +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) +declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vfnmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vf_v8f64(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vf_v8f64_commute(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x double> %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} + +define <8 x double> @vfnmacc_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_v8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %b, i32 0 + %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) + ret <8 x double> %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll @@ -0,0 +1,2039 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fma.nxv1f16(, , , , i32) +declare @llvm.vp.fneg.nxv1f16(, , i32) +declare @llvm.vp.merge.nxv1f16(, , , i32) +declare @llvm.vp.select.nxv1f16(, , , i32) + +define @vfnmsac_vv_nxv1f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f16(, , , , i32) +declare @llvm.vp.fneg.nxv2f16(, , i32) +declare @llvm.vp.merge.nxv2f16(, , , i32) +declare @llvm.vp.select.nxv2f16(, , , i32) + +define @vfnmsac_vv_nxv2f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f16(, , , , i32) +declare @llvm.vp.fneg.nxv4f16(, , i32) +declare @llvm.vp.merge.nxv4f16(, , , i32) +declare @llvm.vp.select.nxv4f16(, , , i32) + +define @vfnmsac_vv_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f16(, , , , i32) +declare @llvm.vp.fneg.nxv8f16(, , i32) +declare @llvm.vp.merge.nxv8f16(, , , i32) +declare @llvm.vp.select.nxv8f16(, , , i32) + +define @vfnmsac_vv_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f16(, , , , i32) +declare @llvm.vp.fneg.nxv16f16(, , i32) +declare @llvm.vp.merge.nxv16f16(, , , i32) +declare @llvm.vp.select.nxv16f16(, , , i32) + +define @vfnmsac_vv_nxv16f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv32f16(, , , , i32) +declare @llvm.vp.fneg.nxv32f16(, , i32) +declare @llvm.vp.merge.nxv32f16(, , , i32) +declare @llvm.vp.select.nxv32f16(, , , i32) + +define @vfnmsac_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f32(, , , , i32) +declare @llvm.vp.fneg.nxv1f32(, , i32) +declare @llvm.vp.merge.nxv1f32(, , , i32) +declare @llvm.vp.select.nxv1f32(, , , i32) + +define @vfnmsac_vv_nxv1f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f32(, , , , i32) +declare @llvm.vp.fneg.nxv2f32(, , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.select.nxv2f32(, , , i32) + +define @vfnmsac_vv_nxv2f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f32(, , , , i32) +declare @llvm.vp.fneg.nxv4f32(, , i32) +declare @llvm.vp.merge.nxv4f32(, , , i32) +declare @llvm.vp.select.nxv4f32(, , , i32) + +define @vfnmsac_vv_nxv4f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f32(, , , , i32) +declare @llvm.vp.fneg.nxv8f32(, , i32) +declare @llvm.vp.merge.nxv8f32(, , , i32) +declare @llvm.vp.select.nxv8f32(, , , i32) + +define @vfnmsac_vv_nxv8f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f32(, , , , i32) +declare @llvm.vp.fneg.nxv16f32(, , i32) +declare @llvm.vp.merge.nxv16f32(, , , i32) +declare @llvm.vp.select.nxv16f32(, , , i32) + +define @vfnmsac_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f64(, , , , i32) +declare @llvm.vp.fneg.nxv1f64(, , i32) +declare @llvm.vp.merge.nxv1f64(, , , i32) +declare @llvm.vp.select.nxv1f64(, , , i32) + +define @vfnmsac_vv_nxv1f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f64(, , , , i32) +declare @llvm.vp.fneg.nxv2f64(, , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) +declare @llvm.vp.select.nxv2f64(, , , i32) + +define @vfnmsac_vv_nxv2f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f64(, , , , i32) +declare @llvm.vp.fneg.nxv4f64(, , i32) +declare @llvm.vp.merge.nxv4f64(, , , i32) +declare @llvm.vp.select.nxv4f64(, , , i32) + +define @vfnmsac_vv_nxv4f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f64(, , , , i32) +declare @llvm.vp.fneg.nxv8f64(, , i32) +declare @llvm.vp.merge.nxv8f64(, , , i32) +declare @llvm.vp.select.nxv8f64(, , , i32) + +define @vfnmsac_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll @@ -0,0 +1,1919 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fma.nxv1f16(, , , , i32) +declare @llvm.vp.fneg.nxv1f16(, , i32) +declare @llvm.vp.merge.nxv1f16(, , , i32) +declare @llvm.vp.select.nxv1f16(, , , i32) + +define @vfmacc_vv_nxv1f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f16(, , , , i32) +declare @llvm.vp.fneg.nxv2f16(, , i32) +declare @llvm.vp.merge.nxv2f16(, , , i32) +declare @llvm.vp.select.nxv2f16(, , , i32) + +define @vfmacc_vv_nxv2f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f16(, , , , i32) +declare @llvm.vp.fneg.nxv4f16(, , i32) +declare @llvm.vp.merge.nxv4f16(, , , i32) +declare @llvm.vp.select.nxv4f16(, , , i32) + +define @vfmacc_vv_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f16(, , , , i32) +declare @llvm.vp.fneg.nxv8f16(, , i32) +declare @llvm.vp.merge.nxv8f16(, , , i32) +declare @llvm.vp.select.nxv8f16(, , , i32) + +define @vfmacc_vv_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f16(, , , , i32) +declare @llvm.vp.fneg.nxv16f16(, , i32) +declare @llvm.vp.merge.nxv16f16(, , , i32) +declare @llvm.vp.select.nxv16f16(, , , i32) + +define @vfmacc_vv_nxv16f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv16f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv16f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv32f16(, , , , i32) +declare @llvm.vp.fneg.nxv32f16(, , i32) +declare @llvm.vp.merge.nxv32f16(, , , i32) +declare @llvm.vp.select.nxv32f16(, , , i32) + +define @vfmacc_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv32f16( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv32f16_commute( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv32f16_unmasked( %va, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv32f16_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv32f16_commute_ta( %va, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f32(, , , , i32) +declare @llvm.vp.fneg.nxv1f32(, , i32) +declare @llvm.vp.merge.nxv1f32(, , , i32) +declare @llvm.vp.select.nxv1f32(, , , i32) + +define @vfmacc_vv_nxv1f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f32( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f32(, , , , i32) +declare @llvm.vp.fneg.nxv2f32(, , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.select.nxv2f32(, , , i32) + +define @vfmacc_vv_nxv2f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f32( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f32(, , , , i32) +declare @llvm.vp.fneg.nxv4f32(, , i32) +declare @llvm.vp.merge.nxv4f32(, , , i32) +declare @llvm.vp.select.nxv4f32(, , , i32) + +define @vfmacc_vv_nxv4f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f32( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f32(, , , , i32) +declare @llvm.vp.fneg.nxv8f32(, , i32) +declare @llvm.vp.merge.nxv8f32(, , , i32) +declare @llvm.vp.select.nxv8f32(, , , i32) + +define @vfmacc_vv_nxv8f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f32( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f32(, , , , i32) +declare @llvm.vp.fneg.nxv16f32(, , i32) +declare @llvm.vp.merge.nxv16f32(, , , i32) +declare @llvm.vp.select.nxv16f32(, , , i32) + +define @vfmacc_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f32( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f32_commute( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f32_unmasked( %va, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f32_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv16f32_commute_ta( %va, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f64(, , , , i32) +declare @llvm.vp.fneg.nxv1f64(, , i32) +declare @llvm.vp.merge.nxv1f64(, , , i32) +declare @llvm.vp.select.nxv1f64(, , , i32) + +define @vfmacc_vv_nxv1f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f64( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv1f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv1f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv1f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f64(, , , , i32) +declare @llvm.vp.fneg.nxv2f64(, , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) +declare @llvm.vp.select.nxv2f64(, , , i32) + +define @vfmacc_vv_nxv2f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f64( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv2f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv2f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f64(, , , , i32) +declare @llvm.vp.fneg.nxv4f64(, , i32) +declare @llvm.vp.merge.nxv4f64(, , , i32) +declare @llvm.vp.select.nxv4f64(, , , i32) + +define @vfmacc_vv_nxv4f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f64( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv4f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv4f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f64(, , , , i32) +declare @llvm.vp.fneg.nxv8f64(, , i32) +declare @llvm.vp.merge.nxv8f64(, , , i32) +declare @llvm.vp.select.nxv8f64(, , , i32) + +define @vfmacc_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f64( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f64_commute( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f64_unmasked( %va, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vv_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f64_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfmacc_vf_nxv8f64_commute_ta( %va, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmacc_vf_nxv8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll @@ -0,0 +1,2039 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fma.nxv1f16(, , , , i32) +declare @llvm.vp.fneg.nxv1f16(, , i32) +declare @llvm.vp.merge.nxv1f16(, , , i32) +declare @llvm.vp.select.nxv1f16(, , , i32) + +define @vmfsac_vv_nxv1f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f16(, , , , i32) +declare @llvm.vp.fneg.nxv2f16(, , i32) +declare @llvm.vp.merge.nxv2f16(, , , i32) +declare @llvm.vp.select.nxv2f16(, , , i32) + +define @vmfsac_vv_nxv2f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f16(, , , , i32) +declare @llvm.vp.fneg.nxv4f16(, , i32) +declare @llvm.vp.merge.nxv4f16(, , , i32) +declare @llvm.vp.select.nxv4f16(, , , i32) + +define @vmfsac_vv_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f16(, , , , i32) +declare @llvm.vp.fneg.nxv8f16(, , i32) +declare @llvm.vp.merge.nxv8f16(, , , i32) +declare @llvm.vp.select.nxv8f16(, , , i32) + +define @vmfsac_vv_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f16(, , , , i32) +declare @llvm.vp.fneg.nxv16f16(, , i32) +declare @llvm.vp.merge.nxv16f16(, , , i32) +declare @llvm.vp.select.nxv16f16(, , , i32) + +define @vmfsac_vv_nxv16f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv16f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv16f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv32f16(, , , , i32) +declare @llvm.vp.fneg.nxv32f16(, , i32) +declare @llvm.vp.merge.nxv32f16(, , , i32) +declare @llvm.vp.select.nxv32f16(, , , i32) + +define @vmfsac_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f32(, , , , i32) +declare @llvm.vp.fneg.nxv1f32(, , i32) +declare @llvm.vp.merge.nxv1f32(, , , i32) +declare @llvm.vp.select.nxv1f32(, , , i32) + +define @vmfsac_vv_nxv1f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f32(, , , , i32) +declare @llvm.vp.fneg.nxv2f32(, , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.select.nxv2f32(, , , i32) + +define @vmfsac_vv_nxv2f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f32(, , , , i32) +declare @llvm.vp.fneg.nxv4f32(, , i32) +declare @llvm.vp.merge.nxv4f32(, , , i32) +declare @llvm.vp.select.nxv4f32(, , , i32) + +define @vmfsac_vv_nxv4f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f32(, , , , i32) +declare @llvm.vp.fneg.nxv8f32(, , i32) +declare @llvm.vp.merge.nxv8f32(, , , i32) +declare @llvm.vp.select.nxv8f32(, , , i32) + +define @vmfsac_vv_nxv8f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f32(, , , , i32) +declare @llvm.vp.fneg.nxv16f32(, , i32) +declare @llvm.vp.merge.nxv16f32(, , , i32) +declare @llvm.vp.select.nxv16f32(, , , i32) + +define @vmfsac_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f64(, , , , i32) +declare @llvm.vp.fneg.nxv1f64(, , i32) +declare @llvm.vp.merge.nxv1f64(, , , i32) +declare @llvm.vp.select.nxv1f64(, , , i32) + +define @vmfsac_vv_nxv1f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv1f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv1f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv1f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f64(, , , , i32) +declare @llvm.vp.fneg.nxv2f64(, , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) +declare @llvm.vp.select.nxv2f64(, , , i32) + +define @vmfsac_vv_nxv2f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv2f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv2f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f64(, , , , i32) +declare @llvm.vp.fneg.nxv4f64(, , i32) +declare @llvm.vp.merge.nxv4f64(, , , i32) +declare @llvm.vp.select.nxv4f64(, , , i32) + +define @vmfsac_vv_nxv4f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv4f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv4f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f64(, , , , i32) +declare @llvm.vp.fneg.nxv8f64(, , i32) +declare @llvm.vp.merge.nxv8f64(, , , i32) +declare @llvm.vp.select.nxv8f64(, , , i32) + +define @vmfsac_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vv_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vmfsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmfsac_vf_nxv8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll @@ -0,0 +1,2159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fma.nxv1f16(, , , , i32) +declare @llvm.vp.fneg.nxv1f16(, , i32) +declare @llvm.vp.merge.nxv1f16(, , , i32) +declare @llvm.vp.select.nxv1f16(, , , i32) + +define @vfnmacc_vv_nxv1f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f16(, , , , i32) +declare @llvm.vp.fneg.nxv2f16(, , i32) +declare @llvm.vp.merge.nxv2f16(, , , i32) +declare @llvm.vp.select.nxv2f16(, , , i32) + +define @vfnmacc_vv_nxv2f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f16(, , , , i32) +declare @llvm.vp.fneg.nxv4f16(, , i32) +declare @llvm.vp.merge.nxv4f16(, , , i32) +declare @llvm.vp.select.nxv4f16(, , , i32) + +define @vfnmacc_vv_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f16(, , , , i32) +declare @llvm.vp.fneg.nxv8f16(, , i32) +declare @llvm.vp.merge.nxv8f16(, , , i32) +declare @llvm.vp.select.nxv8f16(, , , i32) + +define @vfnmacc_vv_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f16(, , , , i32) +declare @llvm.vp.fneg.nxv16f16(, , i32) +declare @llvm.vp.merge.nxv16f16(, , , i32) +declare @llvm.vp.select.nxv16f16(, , , i32) + +define @vfnmacc_vv_nxv16f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv16f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv16f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv32f16(, , , , i32) +declare @llvm.vp.fneg.nxv32f16(, , i32) +declare @llvm.vp.merge.nxv32f16(, , , i32) +declare @llvm.vp.select.nxv32f16(, , , i32) + +define @vfnmacc_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f32(, , , , i32) +declare @llvm.vp.fneg.nxv1f32(, , i32) +declare @llvm.vp.merge.nxv1f32(, , , i32) +declare @llvm.vp.select.nxv1f32(, , , i32) + +define @vfnmacc_vv_nxv1f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f32(, , , , i32) +declare @llvm.vp.fneg.nxv2f32(, , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.select.nxv2f32(, , , i32) + +define @vfnmacc_vv_nxv2f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f32(, , , , i32) +declare @llvm.vp.fneg.nxv4f32(, , i32) +declare @llvm.vp.merge.nxv4f32(, , , i32) +declare @llvm.vp.select.nxv4f32(, , , i32) + +define @vfnmacc_vv_nxv4f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f32(, , , , i32) +declare @llvm.vp.fneg.nxv8f32(, , i32) +declare @llvm.vp.merge.nxv8f32(, , , i32) +declare @llvm.vp.select.nxv8f32(, , , i32) + +define @vfnmacc_vv_nxv8f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f32(, , , , i32) +declare @llvm.vp.fneg.nxv16f32(, , i32) +declare @llvm.vp.merge.nxv16f32(, , , i32) +declare @llvm.vp.select.nxv16f32(, , , i32) + +define @vfnmacc_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f64(, , , , i32) +declare @llvm.vp.fneg.nxv1f64(, , i32) +declare @llvm.vp.merge.nxv1f64(, , , i32) +declare @llvm.vp.select.nxv1f64(, , , i32) + +define @vfnmacc_vv_nxv1f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv1f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv1f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv1f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f64(, , , , i32) +declare @llvm.vp.fneg.nxv2f64(, , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) +declare @llvm.vp.select.nxv2f64(, , , i32) + +define @vfnmacc_vv_nxv2f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv2f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv2f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f64(, , , , i32) +declare @llvm.vp.fneg.nxv4f64(, , i32) +declare @llvm.vp.merge.nxv4f64(, , , i32) +declare @llvm.vp.select.nxv4f64(, , , i32) + +define @vfnmacc_vv_nxv4f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv4f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv4f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f64(, , , , i32) +declare @llvm.vp.fneg.nxv8f64(, , i32) +declare @llvm.vp.merge.nxv8f64(, , , i32) +declare @llvm.vp.select.nxv8f64(, , , i32) + +define @vfnmacc_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vv_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmacc_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmacc_vf_nxv8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmacc.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll @@ -0,0 +1,2039 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fma.nxv1f16(, , , , i32) +declare @llvm.vp.fneg.nxv1f16(, , i32) +declare @llvm.vp.merge.nxv1f16(, , , i32) +declare @llvm.vp.select.nxv1f16(, , , i32) + +define @vfnmsac_vv_nxv1f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f16(, , , , i32) +declare @llvm.vp.fneg.nxv2f16(, , i32) +declare @llvm.vp.merge.nxv2f16(, , , i32) +declare @llvm.vp.select.nxv2f16(, , , i32) + +define @vfnmsac_vv_nxv2f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f16(, , , , i32) +declare @llvm.vp.fneg.nxv4f16(, , i32) +declare @llvm.vp.merge.nxv4f16(, , , i32) +declare @llvm.vp.select.nxv4f16(, , , i32) + +define @vfnmsac_vv_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f16(, , , , i32) +declare @llvm.vp.fneg.nxv8f16(, , i32) +declare @llvm.vp.merge.nxv8f16(, , , i32) +declare @llvm.vp.select.nxv8f16(, , , i32) + +define @vfnmsac_vv_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f16(, , , , i32) +declare @llvm.vp.fneg.nxv16f16(, , i32) +declare @llvm.vp.merge.nxv16f16(, , , i32) +declare @llvm.vp.select.nxv16f16(, , , i32) + +define @vfnmsac_vv_nxv16f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv32f16(, , , , i32) +declare @llvm.vp.fneg.nxv32f16(, , i32) +declare @llvm.vp.merge.nxv32f16(, , , i32) +declare @llvm.vp.select.nxv32f16(, , , i32) + +define @vfnmsac_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_commute( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_unmasked( %a, half %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv32f16_commute_ta( %a, half %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv32f16_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f32(, , , , i32) +declare @llvm.vp.fneg.nxv1f32(, , i32) +declare @llvm.vp.merge.nxv1f32(, , , i32) +declare @llvm.vp.select.nxv1f32(, , , i32) + +define @vfnmsac_vv_nxv1f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f32(, , , , i32) +declare @llvm.vp.fneg.nxv2f32(, , i32) +declare @llvm.vp.merge.nxv2f32(, , , i32) +declare @llvm.vp.select.nxv2f32(, , , i32) + +define @vfnmsac_vv_nxv2f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f32(, , , , i32) +declare @llvm.vp.fneg.nxv4f32(, , i32) +declare @llvm.vp.merge.nxv4f32(, , , i32) +declare @llvm.vp.select.nxv4f32(, , , i32) + +define @vfnmsac_vv_nxv4f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f32(, , , , i32) +declare @llvm.vp.fneg.nxv8f32(, , i32) +declare @llvm.vp.merge.nxv8f32(, , , i32) +declare @llvm.vp.select.nxv8f32(, , , i32) + +define @vfnmsac_vv_nxv8f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv16f32(, , , , i32) +declare @llvm.vp.fneg.nxv16f32(, , i32) +declare @llvm.vp.merge.nxv16f32(, , , i32) +declare @llvm.vp.select.nxv16f32(, , , i32) + +define @vfnmsac_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_commute( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_unmasked( %a, float %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv16f32_commute_ta( %a, float %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv16f32_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv1f64(, , , , i32) +declare @llvm.vp.fneg.nxv1f64(, , i32) +declare @llvm.vp.merge.nxv1f64(, , , i32) +declare @llvm.vp.select.nxv1f64(, , , i32) + +define @vfnmsac_vv_nxv1f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv1f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv1f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv1f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfnmsac.vf v9, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv2f64(, , , , i32) +declare @llvm.vp.fneg.nxv2f64(, , i32) +declare @llvm.vp.merge.nxv2f64(, , , i32) +declare @llvm.vp.select.nxv2f64(, , , i32) + +define @vfnmsac_vv_nxv2f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv2f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv2f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv2f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfnmsac.vf v10, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv4f64(, , , , i32) +declare @llvm.vp.fneg.nxv4f64(, , i32) +declare @llvm.vp.merge.nxv4f64(, , , i32) +declare @llvm.vp.select.nxv4f64(, , , i32) + +define @vfnmsac_vv_nxv4f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv4f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv4f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv4f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfnmsac.vf v12, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.fma.nxv8f64(, , , , i32) +declare @llvm.vp.fneg.nxv8f64(, , i32) +declare @llvm.vp.merge.nxv8f64(, , , i32) +declare @llvm.vp.select.nxv8f64(, , , i32) + +define @vfnmsac_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_commute( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_unmasked( %a, double %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vv_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +} + +define @vfnmsac_vf_nxv8f64_commute_ta( %a, double %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfnmsac_vf_nxv8f64_commute_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfnmsac.vf v16, fa0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) + ret %u +}