diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1536,6 +1536,7 @@ defm vfnmsub : RISCVTernaryAAXARoundingMode; defm vfwmacc : RISCVTernaryWideRoundingMode; + defm vfwmaccbf16 : RISCVTernaryWideRoundingMode; defm vfwnmacc : RISCVTernaryWideRoundingMode; defm vfwmsac : RISCVTernaryWideRoundingMode; defm vfwnmsac : RISCVTernaryWideRoundingMode; @@ -1640,6 +1641,7 @@ defm vfwcvt_rtz_xu_f_v : RISCVConversion; defm vfwcvt_rtz_x_f_v : RISCVConversion; defm vfwcvt_f_f_v : RISCVConversion; + defm vfwcvtbf16_f_f_v : RISCVConversion; defm vfncvt_f_xu_w : RISCVConversionRoundingMode; defm vfncvt_f_x_w : RISCVConversionRoundingMode; @@ -1648,6 +1650,7 @@ defm vfncvt_rtz_xu_f_w : RISCVConversion; defm vfncvt_rtz_x_f_w : RISCVConversion; defm vfncvt_f_f_w : RISCVConversionRoundingMode; + defm vfncvtbf16_f_f_w : RISCVConversionRoundingMode; defm vfncvt_rod_f_f_w : RISCVConversion; // Output: (vector) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -477,6 +477,8 @@ AssemblerPredicate<(all_of FeatureStdExtZvfbfwma), "'Zvfbfwma' (Vector BF16 widening mul-add)">; +def HasVInstructionsBF16 : Predicate<"Subtarget->hasVInstructionsBF16()">; + def FeatureStdExtZvfh : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true", "'Zvfh' (Vector Half-Precision Floating-Point)", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -145,6 +145,9 @@ static const MVT::SimpleValueType F16VecVTs[] = { MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; + static const MVT::SimpleValueType BF16VecVTs[] = { + MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, + MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; static const MVT::SimpleValueType F32VecVTs[] = { MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; static const MVT::SimpleValueType F64VecVTs[] = { @@ -187,6 +190,10 @@ for (MVT VT : F16VecVTs) addRegClassForRVV(VT); + if (Subtarget.hasVInstructionsBF16()) + for (MVT VT : BF16VecVTs) + addRegClassForRVV(VT); + if (Subtarget.hasVInstructionsF32()) for (MVT VT : F32VecVTs) addRegClassForRVV(VT); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1951,7 +1951,6 @@ // Vector include "RISCVInstrInfoV.td" -include "RISCVInstrInfoZvfbf.td" include "RISCVInstrInfoZvk.td" // Integer diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1803,4 +1803,5 @@ } } // Predicates = [HasVInstructionsI64, IsRV64] +include "RISCVInstrInfoZvfbf.td" include "RISCVInstrInfoVPseudos.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -118,12 +118,15 @@ ["_F64", "_F"], ["_F32", "_F"], ["_F16", "_F"], + ["_BF16", "_F"], ["_VF64", "_VF"], ["_VF32", "_VF"], ["_VF16", "_VF"], + ["_VBF16", "_VF"], ["_WF64", "_WF"], ["_WF32", "_WF"], ["_WF16", "_WF"], + ["_WBF16", "_WF"], ["_TU", ""], ["_TIED", ""], ["_MASK", ""], @@ -210,15 +213,28 @@ list MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8])); } +class BFPR_Info { + RegisterClass fprclass = !cast("FPR" # sew); + string FX = "BF" # sew; + int SEW = sew; + list MxList = MxSet.m; + list MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8])); +} + def SCALAR_F16 : FPR_Info<16>; def SCALAR_F32 : FPR_Info<32>; def SCALAR_F64 : FPR_Info<64>; +def SCALAR_BF16 : BFPR_Info<16>; + defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64]; // Used for widening instructions. It excludes F64. defvar FPListW = [SCALAR_F16, SCALAR_F32]; +// Used for widening bf16 instructions. +defvar BFPListW = [SCALAR_BF16]; + class NFSet { list L = !cond(!eq(m.value, V_M8.value): [], !eq(m.value, V_M4.value): [2], @@ -273,6 +289,7 @@ string ScalarSuffix = !cond(!eq(Scal, XLenVT) : "X", !eq(Scal, f16) : "F16", + !eq(Scal, bf16) : "BF16", !eq(Scal, f32) : "F32", !eq(Scal, f64) : "F64"); } @@ -356,6 +373,25 @@ } } +defset list AllBFloatVectors = { + defset list NoGroupBFloatVectors = { + defset list FractionalGroupBFloatVectors = { + def VBF16MF4: VTypeInfo; + def VBF16MF2: VTypeInfo; + } + def VBF16M1: VTypeInfo; + } + + defset list GroupBFloatVectors = { + def VBF16M2: GroupVTypeInfo; + def VBF16M4: GroupVTypeInfo; + def VBF16M8: GroupVTypeInfo; + } +} + // This functor is used to obtain the int vector type that has the same SEW and // multiplier as the input parameter type class GetIntVTypeInfo { @@ -490,6 +526,14 @@ def : VTypeInfoToWide; def : VTypeInfoToWide; } + +defset list AllWidenableBFloatToFloatVectors = { + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; + def : VTypeInfoToWide; +} // This class holds the record of the RISCVVPseudoTable below. // This represents the information we need in codegen for each pseudo. @@ -723,6 +767,7 @@ class GetVTypePredicates { list Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16], + !eq(vti.Scalar, bf16) : [HasVInstructionsBF16], !eq(vti.Scalar, f32) : [HasVInstructionsAnyF], !eq(vti.Scalar, f64) : [HasVInstructionsF64], !eq(vti.SEW, 64) : [HasVInstructionsI64], @@ -3047,6 +3092,12 @@ m.vrclass, m, constraint>; } +multiclass VPseudoTernaryW_VF_BF_RM { + defvar constraint = "@earlyclobber $rd"; + defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode; +} + multiclass VPseudoVSLDVWithPolicy("WriteVFWMulAddV_" # mx); + defvar ReadVFWMulAddV_MX = !cast("ReadVFWMulAddV_" # mx); + + defm "" : VPseudoTernaryW_VV_RM, + Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX, + ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>; + } + + foreach f = BFPListW in { + foreach m = f.MxListFW in { + defvar mx = m.MX; + defvar WriteVFWMulAddF_MX = !cast("WriteVFWMulAddF_" # mx); + defvar ReadVFWMulAddV_MX = !cast("ReadVFWMulAddV_" # mx); + defvar ReadVFWMulAddF_MX = !cast("ReadVFWMulAddF_" # mx); + + defm "" : VPseudoTernaryW_VF_BF_RM, + Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX, + ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>; + } + } +} + multiclass VPseudoVCMPM_VV_VX_VI { foreach m = MxList in { defvar mx = m.MX; @@ -5635,6 +5711,19 @@ } } +multiclass VPatConversionWF_VF_BF { + foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in + { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = !listconcat(GetVTypePredicates.Predicates, + GetVTypePredicates.Predicates) in + defm : VPatConversionTA; + } +} + multiclass VPatConversionVI_WF { foreach vtiToWti = AllWidenableIntToFloatVectors in { defvar vti = vtiToWti.Vti; @@ -5695,6 +5784,18 @@ } } +multiclass VPatConversionVF_WF_BF_RM { + foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = !listconcat(GetVTypePredicates.Predicates, + GetVTypePredicates.Predicates) in + defm : VPatConversionTARoundingMode; + } +} + multiclass VPatCompare_VI { foreach vti = AllIntegerVectors in { @@ -6140,6 +6241,8 @@ defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM; defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM; defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM; +let Predicates = [HasStdExtZvfbfwma] in +defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM; } //===----------------------------------------------------------------------===// @@ -6244,6 +6347,7 @@ defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V; defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V; +defm PseudoVFWCVTBF16_F_F : VPseudoVWCVTD_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -6269,6 +6373,7 @@ let hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; +defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true @@ -6804,7 +6909,10 @@ defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>; defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", - AllWidenableFloatVectors>; + AllWidenableFloatVectors>; +let Predicates = [HasStdExtZvfbfwma] in +defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16", + AllWidenableBFloatToFloatVectors>; //===----------------------------------------------------------------------===// // 13.8. Vector Floating-Point Square-Root Instruction @@ -6909,6 +7017,8 @@ defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">; defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X">; defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">; +defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v", + "PseudoVFWCVTBF16_F_F">; //===----------------------------------------------------------------------===// // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions @@ -6920,6 +7030,8 @@ defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">; defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">; defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">; +defm : VPatConversionVF_WF_BF_RM<"int_riscv_vfncvtbf16_f_f_w", + "PseudoVFNCVTBF16_F_F">; defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -883,15 +883,16 @@ // 7.4. Vector Unit-Stride Instructions foreach vti = !listconcat(FractionalGroupIntegerVectors, - FractionalGroupFloatVectors) in + FractionalGroupFloatVectors, + FractionalGroupBFloatVectors) in let Predicates = GetVTypePredicates.Predicates in defm : VPatUSLoadStoreSDNode; -foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in +foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in let Predicates = GetVTypePredicates.Predicates in defm : VPatUSLoadStoreWholeVRSDNode; -foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in +foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in let Predicates = GetVTypePredicates.Predicates in defm : VPatUSLoadStoreWholeVRSDNode; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -313,6 +313,13 @@ defvar vfloat16m4_t = nxv16f16; defvar vfloat16m8_t = nxv32f16; +defvar vbfloat16mf4_t = nxv1bf16; +defvar vbfloat16mf2_t = nxv2bf16; +defvar vbfloat16m1_t = nxv4bf16; +defvar vbfloat16m2_t = nxv8bf16; +defvar vbfloat16m4_t = nxv16bf16; +defvar vbfloat16m8_t = nxv32bf16; + defvar vfloat32mf2_t = nxv1f32; defvar vfloat32m1_t = nxv2f32; defvar vfloat32m2_t = nxv4f32; @@ -489,19 +496,23 @@ vbool32_t, vbool64_t]; defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, - vfloat16m1_t, vfloat32m1_t, vfloat64m1_t, - vint8mf2_t, vint8mf4_t, vint8mf8_t, - vint16mf2_t, vint16mf4_t, vint32mf2_t, - vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t]; + vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t, + vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t, + vint16mf2_t, vint16mf4_t, vint32mf2_t, + vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t, + vbfloat16mf2_t, vfloat32mf2_t]; defvar VM2VTs = [vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t, - vfloat16m2_t, vfloat32m2_t, vfloat64m2_t]; - + vfloat16m2_t, vbfloat16m2_t, + vfloat32m2_t, vfloat64m2_t]; + defvar VM4VTs = [vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t, - vfloat16m4_t, vfloat32m4_t, vfloat64m4_t]; - + vfloat16m4_t, vbfloat16m4_t, + vfloat32m4_t, vfloat64m4_t]; + defvar VM8VTs = [vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t, - vfloat16m8_t, vfloat32m8_t, vfloat64m8_t]; + vfloat16m8_t, vbfloat16m8_t, + vfloat32m8_t, vfloat64m8_t]; def VR : VReg @llvm.riscv.vfncvtbf16.f.f.w.nxv1bf16.nxv1f32( + , + , + iXLen, iXLen); + +define @intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.nxv1bf16.nxv1f32( + undef, + %0, + iXLen 7, iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv1bf16.nxv1f32( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfncvtbf16_mask_f.f.w_nxv1bf16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv1bf16.nxv1f32( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.nxv2bf16.nxv2f32( + , + , + iXLen, iXLen); + +define @intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.nxv2bf16.nxv2f32( + undef, + %0, + iXLen 7, iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv2bf16.nxv2f32( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfncvtbf16_mask_f.f.w_nxv2bf16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv2bf16.nxv2f32( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.nxv4bf16.nxv4f32( + , + , + iXLen, iXLen); + +define @intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.nxv4bf16.nxv4f32( + undef, + %0, + iXLen 7, iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv4bf16.nxv4f32( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfncvtbf16_mask_f.f.w_nxv4bf16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv4bf16.nxv4f32( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.nxv8bf16.nxv8f32( + , + , + iXLen, iXLen); + +define @intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.nxv8bf16.nxv8f32( + undef, + %0, + iXLen 7, iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv8bf16.nxv8f32( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfncvtbf16_mask_f.f.w_nxv8bf16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv8bf16.nxv8f32( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.nxv16bf16.nxv16f32( + , + , + iXLen, iXLen); + +define @intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.nxv16bf16.nxv16f32( + undef, + %0, + iXLen 7, iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv16bf16.nxv16f32( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfncvtbf16_mask_f.f.w_nxv16bf16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvtbf16_mask_f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfncvtbf16.f.f.w.mask.nxv16bf16.nxv16f32( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 1) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvtbf16-f-f.ll @@ -0,0 +1,224 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfmin \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s +declare @llvm.riscv.vfwcvtbf16.f.f.v.nxv1f32.nxv1bf16( + , + , + iXLen); + +define @intrinsic_vfwcvtbf16_f.f.v_nxv1f32_nxv1bf16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.nxv1f32.nxv1bf16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv1f32.nxv1bf16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfwcvtbf16_mask_f.f.v_nxv1f32_nxv1bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv1f32.nxv1bf16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.nxv2f32.nxv2bf16( + , + , + iXLen); + +define @intrinsic_vfwcvtbf16_f.f.v_nxv2f32_nxv2bf16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.nxv2f32.nxv2bf16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv2f32.nxv2bf16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfwcvtbf16_mask_f.f.v_nxv2f32_nxv2bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv2f32.nxv2bf16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16( + , + , + iXLen); + +define @intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.nxv4f32.nxv4bf16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv4f32.nxv4bf16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfwcvtbf16_mask_f.f.v_nxv4f32_nxv4bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv4f32.nxv4bf16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16( + , + , + iXLen); + +define @intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.nxv8f32.nxv8bf16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv8f32.nxv8bf16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfwcvtbf16_mask_f.f.v_nxv8f32_nxv8bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv8f32.nxv8bf16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16( + , + , + iXLen); + +define @intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_f.f.v_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.nxv16f32.nxv16bf16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv16f32.nxv16bf16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfwcvtbf16_mask_f.f.v_nxv16f32_nxv16bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvtbf16_mask_f.f.v_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwcvtbf16.f.f.v.mask.nxv16f32.nxv16bf16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmaccbf16.ll @@ -0,0 +1,464 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfwma\ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zfbfmin,+experimental-zvfbfwma \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s +declare @llvm.riscv.vfwmaccbf16.nxv1f32.nxv1bf16( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vv_nxv1f32_nxv1bf16_nxv1bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv1f32.nxv1bf16( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv1f32.nxv1bf16( + , + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vv_nxv1f32_nxv1bf16_nxv1bf16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv1f32.nxv1bf16( + %0, + %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv2f32.nxv2bf16( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vv_nxv2f32_nxv2bf16_nxv2bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv2f32.nxv2bf16( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv2f32.nxv2bf16( + , + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vv_nxv2f32_nxv2bf16_nxv2bf16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmaccbf16.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv2f32.nxv2bf16( + %0, + %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv4f32.nxv4bf16( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vv_nxv4f32_nxv4bf16_nxv4bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v10, v11 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv4f32.nxv4bf16( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv4f32.nxv4bf16( + , + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vv_nxv4f32_nxv4bf16_nxv4bf16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmaccbf16.vv v8, v10, v11, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv4f32.nxv4bf16( + %0, + %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv8f32.nxv8bf16( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vv_nxv8f32_nxv8bf16_nxv8bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v12, v14 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv8f32.nxv8bf16( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv8f32.nxv8bf16( + , + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vv_nxv8f32_nxv8bf16_nxv8bf16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmaccbf16.vv v8, v12, v14, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv8f32.nxv8bf16( + %0, + %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv16f32.nxv16bf16( + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vv_nxv16f32_nxv16bf16_nxv16bf16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vfwmaccbf16.vv v8, v16, v20 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv16f32.nxv16bf16( + %0, + %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv16f32.nxv16bf16( + , + , + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vv_nxv16f32_nxv16bf16_nxv16bf16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmaccbf16.vv v8, v16, v20, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv16f32.nxv16bf16( + %0, + %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv1f32.bf16( + , + bfloat, + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vf_nxv1f32_bf16_nxv1bf16( %0, bfloat %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv1f32.bf16( + %0, + bfloat %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv1f32.bf16( + , + bfloat, + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vf_nxv1f32_bf16_nxv1bf16( %0, bfloat %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv1f32.bf16( + %0, + bfloat %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv2f32.bf16( + , + bfloat, + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vf_nxv2f32_bf16_nxv2bf16( %0, bfloat %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv2f32.bf16( + %0, + bfloat %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv2f32.bf16( + , + bfloat, + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vf_nxv2f32_bf16_nxv2bf16( %0, bfloat %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv2f32.bf16( + %0, + bfloat %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv4f32.bf16( + , + bfloat, + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vf_nxv4f32_bf16_nxv4bf16( %0, bfloat %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv4f32.bf16( + %0, + bfloat %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv4f32.bf16( + , + bfloat, + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vf_nxv4f32_bf16_nxv4bf16( %0, bfloat %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv4f32.bf16( + %0, + bfloat %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv8f32.bf16( + , + bfloat, + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vf_nxv8f32_bf16_nxv8bf16( %0, bfloat %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv8f32.bf16( + %0, + bfloat %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv8f32.bf16( + , + bfloat, + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vf_nxv8f32_bf16_nxv8bf16( %0, bfloat %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv8f32.bf16( + %0, + bfloat %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.nxv16f32.bf16( + , + bfloat, + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_vf_nxv16f32_bf16_nxv16bf16( %0, bfloat %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.nxv16f32.bf16( + %0, + bfloat %1, + %2, + iXLen 7, iXLen %3, iXLen 0) + + ret %a +} + +declare @llvm.riscv.vfwmaccbf16.mask.nxv16f32.bf16( + , + bfloat, + , + , + iXLen, iXLen, iXLen); + +define @intrinsic_vfwmaccbf16_mask_vf_nxv16f32_bf16_nxv16bf16( %0, bfloat %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmaccbf16_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmaccbf16.vf v8, fa0, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwmaccbf16.mask.nxv16f32.bf16( + %0, + bfloat %1, + %2, + %3, + iXLen 7, iXLen %4, iXLen 0) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vle.ll b/llvm/test/CodeGen/RISCV/rvv/vle.ll --- a/llvm/test/CodeGen/RISCV/rvv/vle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vle.nxv1i64( , @@ -1293,6 +1293,264 @@ ret %a } +declare @llvm.riscv.vle.nxv1bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv1bf16_nxv1bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv1bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv1bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv1bf16_nxv1bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv1bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vle.nxv2bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv2bf16_nxv2bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv2bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv2bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv2bf16_nxv2bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv2bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vle.nxv4bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv4bf16_nxv4bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv4bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv4bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv4bf16_nxv4bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv4bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vle.nxv8bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv8bf16_nxv8bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv8bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv8bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv8bf16_nxv8bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv8bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vle.nxv16bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv16bf16_nxv16bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv16bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv16bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv16bf16_nxv16bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv16bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vle.nxv32bf16( + , + *, + iXLen); + +define @intrinsic_vle_v_nxv32bf16_nxv32bf16(* %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vle_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.nxv32bf16( + undef, + * %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vle.mask.nxv32bf16( + , + *, + , + iXLen, + iXLen); + +define @intrinsic_vle_mask_v_nxv32bf16_nxv32bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vle_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vle.mask.nxv32bf16( + %0, + * %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + declare @llvm.riscv.vle.nxv1i8( , *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vse.ll b/llvm/test/CodeGen/RISCV/rvv/vse.ll --- a/llvm/test/CodeGen/RISCV/rvv/vse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( , @@ -1263,6 +1263,258 @@ ret void } +declare void @llvm.riscv.vse.nxv1bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv1bf16_nxv1bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv1bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv1bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv1bf16_nxv1bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv1bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vse.nxv2bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv2bf16_nxv2bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv2bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv2bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv2bf16_nxv2bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv2bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vse.nxv4bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv4bf16_nxv4bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv4bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv4bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv4bf16_nxv4bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv4bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vse.nxv8bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv8bf16_nxv8bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv8bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv8bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv8bf16_nxv8bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv8bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vse.nxv16bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv16bf16_nxv16bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv16bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv16bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv16bf16_nxv16bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv16bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + +declare void @llvm.riscv.vse.nxv32bf16( + , + *, + iXLen); + +define void @intrinsic_vse_v_nxv32bf16_nxv32bf16( %0, * %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vse_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.nxv32bf16( + %0, + * %1, + iXLen %2) + + ret void +} + +declare void @llvm.riscv.vse.mask.nxv32bf16( + , + *, + , + iXLen); + +define void @intrinsic_vse_mask_v_nxv32bf16_nxv32bf16( %0, * %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vse_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret +entry: + call void @llvm.riscv.vse.mask.nxv32bf16( + %0, + * %1, + %2, + iXLen %3) + + ret void +} + declare void @llvm.riscv.vse.nxv1i8( , *,