diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1407,6 +1407,13 @@ Expr *SizeExpr, SourceLocation AttrLoc) const; + /// Return the unique reference to a vector type of the specified + /// element type and its width is the same as the register size. + /// + /// \pre \p VectorType must be a built-in type. + QualType getRegisterSizedVectorType(QualType VectorType, + VectorType::VectorKind VecKind) const; + /// Return the unique reference to the matrix type of the specified element /// type and size /// diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -38,6 +38,8 @@ // V -> Vector, followed by the number of elements and the base type. // q -> Scalable vector, followed by the number of elements and the base type. // E -> ext_vector, followed by the number of elements and the base type. +// e -> Register sized Vector, followd by the base type and +// its width is the same as the register size. // X -> _Complex, followed by the base type. // Y -> ptrdiff_t // P -> FILE diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def --- a/clang/include/clang/Basic/BuiltinsRISCV.def +++ b/clang/include/clang/Basic/BuiltinsRISCV.def @@ -36,5 +36,704 @@ TARGET_BUILTIN(__builtin_riscv_crc32_d, "LiLi", "nc", "experimental-zbr") TARGET_BUILTIN(__builtin_riscv_crc32c_d, "LiLi", "nc", "experimental-zbr") +// P extension + +// add8 +TARGET_BUILTIN(__rv__add8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uadd8, "eUceUceUc", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sadd8, "eSceSceSc", "", "experimental-p") + +// add16 +TARGET_BUILTIN(__rv__add16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uadd16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sadd16, "eSseSseSs", "", "experimental-p") + +// ave +TARGET_BUILTIN(__rv__ave, "SLiSLiSLi", "", "experimental-p") + +// bitrev +TARGET_BUILTIN(__rv__bitrev, "ULiULiULi", "", "experimental-p") + +// bpick +TARGET_BUILTIN(__rv__bpick, "ULiULiULiULi", "", "experimental-p") + +// clrs8 +TARGET_BUILTIN(__rv__clrs8, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clrs8, "eUceSc", "", "experimental-p") + +// clrs16 +TARGET_BUILTIN(__rv__clrs16, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clrs16, "eUseSs", "", "experimental-p") + +// clrs32 +TARGET_BUILTIN(__rv__clrs32, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clrs32, "V2UiV2Si", "", "experimental-p") + +// clo8 +TARGET_BUILTIN(__rv__clo8, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clo8, "eUceSc", "", "experimental-p") + +// clo16 +TARGET_BUILTIN(__rv__clo16, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clo16, "eUseSs", "", "experimental-p") + +// clo32 +TARGET_BUILTIN(__rv__clo32, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clo32, "V2UiV2Si", "", "experimental-p") + +// clz8 +TARGET_BUILTIN(__rv__clz8, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clz8, "eUceSc", "", "experimental-p") + +// clz16 +TARGET_BUILTIN(__rv__clz16, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clz16, "eUseSs", "", "experimental-p") + +// clz32 +TARGET_BUILTIN(__rv__clz32, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_clz32, "V2UiV2Si", "", "experimental-p") + +// cmpeq8 +TARGET_BUILTIN(__rv__cmpeq8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmpeq8, "eUceUceUc", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmpeq8, "eUceSceSc", "", "experimental-p") + +// cmpeq16 +TARGET_BUILTIN(__rv__cmpeq16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmpeq16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmpeq16, "eUseSseSs", "", "experimental-p") + +// cras16 +TARGET_BUILTIN(__rv__cras16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucras16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scras16, "eSseSseSs", "", "experimental-p") + +// crsa16 +TARGET_BUILTIN(__rv__crsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucrsa16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scrsa16, "eSseSseSs", "", "experimental-p") + +// insb +TARGET_BUILTIN(__rv__insb, "ULiULiULiULi", "", "experimental-p") + +// kabs8 +TARGET_BUILTIN(__rv__kabs8, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kabs8, "eSceSc", "", "experimental-p") + +// kabs16 +TARGET_BUILTIN(__rv__kabs16, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kabs16, "eSseSs", "", "experimental-p") + +// kabsw +TARGET_BUILTIN(__rv__kabsw, "ULiSLi", "", "experimental-p") + +// kadd8 +TARGET_BUILTIN(__rv__kadd8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kadd8, "eSceSceSc", "", "experimental-p") + +// kadd16 +TARGET_BUILTIN(__rv__kadd16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kadd16, "eSseSseSs", "", "experimental-p") + +// kaddh +TARGET_BUILTIN(__rv__kaddh, "LiLiLi", "", "experimental-p") + +// kaddw +TARGET_BUILTIN(__rv__kaddw, "LiLiLi", "", "experimental-p") + +// kcras16 +TARGET_BUILTIN(__rv__kcras16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kcras16, "eSseSseSs", "", "experimental-p") + +// kcrsa16 +TARGET_BUILTIN(__rv__kcrsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kcrsa16, "eSseSseSs", "", "experimental-p") + +// kdmbb +TARGET_BUILTIN(__rv__kdmbb, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmbb, "LieSseSs", "", "experimental-p") +// kdmbt +TARGET_BUILTIN(__rv__kdmbt, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmbt, "LieSseSs", "", "experimental-p") +// kdmtt +TARGET_BUILTIN(__rv__kdmtt, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmtt, "LieSseSs", "", "experimental-p") + +// kdmabb +TARGET_BUILTIN(__rv__kdmabb, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmabb, "LiLieSseSs", "", "experimental-p") +// kdmabt +TARGET_BUILTIN(__rv__kdmabt, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmabt, "LiLieSseSs", "", "experimental-p") +// kdmatt +TARGET_BUILTIN(__rv__kdmatt, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kdmatt, "LiLieSseSs", "", "experimental-p") + +// khm8 +TARGET_BUILTIN(__rv__khm8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khm8, "eSceSceSc", "", "experimental-p") +// khmx8 +TARGET_BUILTIN(__rv__khmx8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khmx8, "eSceSceSc", "", "experimental-p") + +// khm16 +TARGET_BUILTIN(__rv__khm16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khm16, "eSseSseSs", "", "experimental-p") +// khmx16 +TARGET_BUILTIN(__rv__khmx16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khmx16, "eSseSseSs", "", "experimental-p") + +// khmbb +TARGET_BUILTIN(__rv__khmbb, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khmbb, "LieSseSs", "", "experimental-p") +// khmbt +TARGET_BUILTIN(__rv__khmbt, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khmbt, "LieSseSs", "", "experimental-p") +// khmtt +TARGET_BUILTIN(__rv__khmtt, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_khmtt, "LieSseSs", "", "experimental-p") + +// kmabb +TARGET_BUILTIN(__rv__kmabb, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmabb, "eSieSieSseSs", "", "experimental-p") +// kmabt +TARGET_BUILTIN(__rv__kmabt, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmabt, "eSieSieSseSs", "", "experimental-p") +// kmatt +TARGET_BUILTIN(__rv__kmatt, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmatt, "eSieSieSseSs", "", "experimental-p") + +// kmada +TARGET_BUILTIN(__rv__kmada, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmada, "eSieSieSseSs", "", "experimental-p") +// kmaxda +TARGET_BUILTIN(__rv__kmaxda, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmaxda, "eSieSieSseSs", "", "experimental-p") + +// kmads +TARGET_BUILTIN(__rv__kmads, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmads, "eSieSieSseSs", "", "experimental-p") +// kmadrs +TARGET_BUILTIN(__rv__kmadrs, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmadrs, "eSieSieSseSs", "", "experimental-p") +// kmaxds +TARGET_BUILTIN(__rv__kmaxds, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmaxds, "eSieSieSseSs", "", "experimental-p") + +// kmda +TARGET_BUILTIN(__rv__kmda, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmda, "eSieSseSs", "", "experimental-p") +// kmxda +TARGET_BUILTIN(__rv__kmxda, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmxda, "eSieSseSs", "", "experimental-p") + +// kmmac +TARGET_BUILTIN(__rv__kmmac, "LiLiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmac, "V2SiV2SiV2SiV2Si", "", "experimental-p") +// kmmac.u +TARGET_BUILTIN(__rv__kmmac_u, "LiLiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmac_u, "V2SiV2SiV2SiV2Si", "", "experimental-p") + +// kmmawb +TARGET_BUILTIN(__rv__kmmawb, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawb, "eSieSieSieSs", "", "experimental-p") +// kmmawb_u +TARGET_BUILTIN(__rv__kmmawb_u, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawb_u, "eSieSieSieSs", "", "experimental-p") + +// kmmawb2 +TARGET_BUILTIN(__rv__kmmawb2, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawb2, "eSieSieSieSs", "", "experimental-p") +// kmmawb2_u +TARGET_BUILTIN(__rv__kmmawb2_u, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawb2_u, "eSieSieSieSs", "", "experimental-p") + +// kmmawt +TARGET_BUILTIN(__rv__kmmawt, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawt, "eSieSieSieSs", "", "experimental-p") +// kmmawt_u +TARGET_BUILTIN(__rv__kmmawt_u, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawt_u, "eSieSieSieSs", "", "experimental-p") + +// kmmawt2 +TARGET_BUILTIN(__rv__kmmawt2, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawt2, "eSieSieSieSs", "", "experimental-p") +// kmmawt2_u +TARGET_BUILTIN(__rv__kmmawt2_u, "LiLiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmawt2_u, "eSieSieSieSs", "", "experimental-p") + +// kmmsb +TARGET_BUILTIN(__rv__kmmsb, "LiLiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmsb, "V2SiV2SiV2SiV2Si", "", "experimental-p") +// kmmsb.u +TARGET_BUILTIN(__rv__kmmsb_u, "LiLiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmsb_u, "V2SiV2SiV2SiV2Si", "", "experimental-p") + +// kmmwb2 +TARGET_BUILTIN(__rv__kmmwb2, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmwb2, "eSieSieSs", "", "experimental-p") +// kmmwb2_u +TARGET_BUILTIN(__rv__kmmwb2_u, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmwb2_u, "eSieSieSs", "", "experimental-p") + +// kmmwt2 +TARGET_BUILTIN(__rv__kmmwt2, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmwt2, "eSieSieSs", "", "experimental-p") +// kmmwt2_u +TARGET_BUILTIN(__rv__kmmwt2_u, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmmwt2_u, "eSieSieSs", "", "experimental-p") + +// kmsda +TARGET_BUILTIN(__rv__kmsda, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmsda, "eSieSieSseSs", "", "experimental-p") +// kmsxda +TARGET_BUILTIN(__rv__kmsxda, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kmsxda, "eSieSieSseSs", "", "experimental-p") + +// ksllw +TARGET_BUILTIN(__rv__ksllw, "LiLiULi", "", "experimental-p") + +// ksll8 +TARGET_BUILTIN(__rv__ksll8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ksll8, "eSceScULi", "", "experimental-p") + +// ksll16 +TARGET_BUILTIN(__rv__ksll16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ksll16, "eSseSsULi", "", "experimental-p") + +// kslra8 +TARGET_BUILTIN(__rv__kslra8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kslra8, "eSceScULi", "", "experimental-p") +// kslra8_u +TARGET_BUILTIN(__rv__kslra8_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kslra8_u, "eSceScULi", "", "experimental-p") + +// kslra16 +TARGET_BUILTIN(__rv__kslra16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kslra16, "eSseSsULi", "", "experimental-p") +// kslra16_u +TARGET_BUILTIN(__rv__kslra16_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kslra16_u, "eSseSsULi", "", "experimental-p") + +// kslraw +TARGET_BUILTIN(__rv__kslraw, "LiLiLi", "", "experimental-p") +// kslraw_u +TARGET_BUILTIN(__rv__kslraw_u, "LiLiLi", "", "experimental-p") + +// kstas16 +TARGET_BUILTIN(__rv__kstas16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kstas16, "eSseSseSs", "", "experimental-p") + +// kstsa16 +TARGET_BUILTIN(__rv__kstsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kstsa16, "eSseSseSs", "", "experimental-p") + +// ksub8 +TARGET_BUILTIN(__rv__ksub8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ksub8, "eSceSceSc", "", "experimental-p") + +// ksub16 +TARGET_BUILTIN(__rv__ksub16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ksub16, "eSseSseSs", "", "experimental-p") + +// ksubh +TARGET_BUILTIN(__rv__ksubh, "LiLiLi", "", "experimental-p") + +// ksubw +TARGET_BUILTIN(__rv__ksubw, "LiLiLi", "", "experimental-p") + +// kwmmul +TARGET_BUILTIN(__rv__kwmmul, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kwmmul, "V2SiV2SiV2Si", "", "experimental-p") +// kwmmul_u +TARGET_BUILTIN(__rv__kwmmul_u, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_kwmmul_u, "V2SiV2SiV2Si", "", "experimental-p") + +// maxw +TARGET_BUILTIN(__rv__maxw, "LiLiLi", "", "experimental-p") + +// minw +TARGET_BUILTIN(__rv__minw, "LiLiLi", "", "experimental-p") + +// pbsad +TARGET_BUILTIN(__rv__pbsad, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pbsad, "ULieUceUc", "", "experimental-p") + +// pbsada +TARGET_BUILTIN(__rv__pbsada, "ULiULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pbsada, "ULiULieUceUc", "", "experimental-p") + +// pkbb16 +TARGET_BUILTIN(__rv__pkbb16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pkbb16, "eUseUseUs", "", "experimental-p") +// pkbt16 +TARGET_BUILTIN(__rv__pkbt16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pkbt16, "eUseUseUs", "", "experimental-p") +// pktt16 +TARGET_BUILTIN(__rv__pktt16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pktt16, "eUseUseUs", "", "experimental-p") +// pktb16 +TARGET_BUILTIN(__rv__pktb16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_pktb16, "eUseUseUs", "", "experimental-p") + +// radd8 +TARGET_BUILTIN(__rv__radd8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_radd8, "eSceSceSc", "", "experimental-p") + +// radd16 +TARGET_BUILTIN(__rv__radd16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_radd16, "eSseSseSs", "", "experimental-p") + +// raddw +TARGET_BUILTIN(__rv__raddw, "LiLiLi", "", "experimental-p") + +// rcras16 +TARGET_BUILTIN(__rv__rcras16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rcras16, "eSseSseSs", "", "experimental-p") + +// rcrsa16 +TARGET_BUILTIN(__rv__rcrsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rcrsa16, "eSseSseSs", "", "experimental-p") + +// rstas16 +TARGET_BUILTIN(__rv__rstas16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rstas16, "eSseSseSs", "", "experimental-p") + +// rstsa16 +TARGET_BUILTIN(__rv__rstsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rstsa16, "eSseSseSs", "", "experimental-p") + +// rsub8 +TARGET_BUILTIN(__rv__rsub8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rsub8, "eSceSceSc", "", "experimental-p") + +// rsub16 +TARGET_BUILTIN(__rv__rsub16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_rsub16, "eSseSseSs", "", "experimental-p") + +// rsubw +TARGET_BUILTIN(__rv__rsubw, "LiLiLi", "", "experimental-p") + +// sclip8 +TARGET_BUILTIN(__rv__sclip8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sclip8, "eSceScULi", "", "experimental-p") + +// sclip16 +TARGET_BUILTIN(__rv__sclip16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sclip16, "eSseSsULi", "", "experimental-p") + +// sclip32 +TARGET_BUILTIN(__rv__sclip32, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sclip32, "V2SiV2SiULi", "", "experimental-p") + +// scmple8 +TARGET_BUILTIN(__rv__scmple8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmple8, "eUceSceSc", "", "experimental-p") + +// scmple16 +TARGET_BUILTIN(__rv__scmple16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmple16, "eUseSseSs", "", "experimental-p") + +// scmplt8 +TARGET_BUILTIN(__rv__scmplt8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmplt8, "eUceSceSc", "", "experimental-p") + +// scmplt16 +TARGET_BUILTIN(__rv__scmplt16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_scmplt16, "eUseSseSs", "", "experimental-p") + +// sll8 +TARGET_BUILTIN(__rv__sll8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sll8, "eUceUcULi", "", "experimental-p") + +// sll16 +TARGET_BUILTIN(__rv__sll16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sll16, "eUseUsULi", "", "experimental-p") + +// smaqa +TARGET_BUILTIN(__rv__smaqa, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smaqa, "eSieSieSceSc", "", "experimental-p") +// smaqa_su +TARGET_BUILTIN(__rv__smaqa_su, "LiLiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smaqa_su, "eSieSieSceSc", "", "experimental-p") + +// smax8 +TARGET_BUILTIN(__rv__smax8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smax8, "eSceSceSc", "", "experimental-p") + +// smax16 +TARGET_BUILTIN(__rv__smax16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smax16, "eSseSseSs", "", "experimental-p") + +// smbb16 +TARGET_BUILTIN(__rv__smbb16, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smbb16, "eSieSseSs", "", "experimental-p") +// smbt16 +TARGET_BUILTIN(__rv__smbt16, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smbt16, "eSieSseSs", "", "experimental-p") +// smtt16 +TARGET_BUILTIN(__rv__smtt16, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smtt16, "eSieSseSs", "", "experimental-p") + +// smds +TARGET_BUILTIN(__rv__smds, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smds, "eSieSseSs", "", "experimental-p") +// smdrs +TARGET_BUILTIN(__rv__smdrs, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smdrs, "eSieSseSs", "", "experimental-p") +// smxds +TARGET_BUILTIN(__rv__smxds, "LiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smxds, "eSieSseSs", "", "experimental-p") + +// smin8 +TARGET_BUILTIN(__rv__smin8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smin8, "eSceSceSc", "", "experimental-p") + +// smin16 +TARGET_BUILTIN(__rv__smin16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smin16, "eSseSseSs", "", "experimental-p") + +// smmul +TARGET_BUILTIN(__rv__smmul, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmul, "V2SiV2SiV2Si", "", "experimental-p") +// smmul_u +TARGET_BUILTIN(__rv__smmul_u, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmul_u, "V2SiV2SiV2Si", "", "experimental-p") + +// smmwb +TARGET_BUILTIN(__rv__smmwb, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmwb, "eSieSieSs", "", "experimental-p") +// smmwb_u +TARGET_BUILTIN(__rv__smmwb_u, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmwb_u, "eSieSieSs", "", "experimental-p") + +// smmwt +TARGET_BUILTIN(__rv__smmwt, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmwt, "eSieSieSs", "", "experimental-p") +// smmwt_u +TARGET_BUILTIN(__rv__smmwt_u, "LiLiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_smmwt_u, "eSieSieSs", "", "experimental-p") + +// sra_u +TARGET_BUILTIN(__rv__sra_u, "LiLiULi", "", "experimental-p") + +// sra8 +TARGET_BUILTIN(__rv__sra8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sra8, "eSceScULi", "", "experimental-p") +// sra8_u +TARGET_BUILTIN(__rv__sra8_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sra8_u, "eSceScULi", "", "experimental-p") + +// sra16 +TARGET_BUILTIN(__rv__sra16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sra16, "eSseSsULi", "", "experimental-p") +// sra16_u +TARGET_BUILTIN(__rv__sra16_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sra16_u, "eSseSsULi", "", "experimental-p") + +// srl8 +TARGET_BUILTIN(__rv__srl8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_srl8, "eUceUcULi", "", "experimental-p") +// srl8_u +TARGET_BUILTIN(__rv__srl8_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_srl8_u, "eUceUcULi", "", "experimental-p") + +// srl16 +TARGET_BUILTIN(__rv__srl16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_srl16, "eUseUsULi", "", "experimental-p") +// srl16_u +TARGET_BUILTIN(__rv__srl16_u, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_srl16_u, "eUseUsULi", "", "experimental-p") + +// stas16 +TARGET_BUILTIN(__rv__stas16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ustas16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sstas16, "eSseSseSs", "", "experimental-p") + +// stsa16 +TARGET_BUILTIN(__rv__stsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ustsa16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sstsa16, "eSseSseSs", "", "experimental-p") + +// sub8 +TARGET_BUILTIN(__rv__sub8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_usub8, "eUceUceUc", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ssub8, "eSceSceSc", "", "experimental-p") + +// sub16 +TARGET_BUILTIN(__rv__sub16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_usub16, "eUseUseUs", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ssub16, "eSseSseSs", "", "experimental-p") + +// sunpkd810 +TARGET_BUILTIN(__rv__sunpkd810, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sunpkd810, "eSseSc", "", "experimental-p") +// sunpkd820 +TARGET_BUILTIN(__rv__sunpkd820, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sunpkd820, "eSseSc", "", "experimental-p") +// sunpkd830 +TARGET_BUILTIN(__rv__sunpkd830, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sunpkd830, "eSseSc", "", "experimental-p") +// sunpkd831 +TARGET_BUILTIN(__rv__sunpkd831, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sunpkd831, "eSseSc", "", "experimental-p") +// sunpkd832 +TARGET_BUILTIN(__rv__sunpkd832, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_sunpkd832, "eSseSc", "", "experimental-p") + +// swap8 +TARGET_BUILTIN(__rv__swap8, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_swap8, "eUceSc", "", "experimental-p") + +// swap16 +TARGET_BUILTIN(__rv__swap16, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_swap16, "eUseSs", "", "experimental-p") + +// uclip8 +TARGET_BUILTIN(__rv__uclip8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uclip8, "eSceScULi", "", "experimental-p") + +// uclip16 +TARGET_BUILTIN(__rv__uclip16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uclip16, "eSseSsULi", "", "experimental-p") + +// uclip32 +TARGET_BUILTIN(__rv__uclip32, "LiLiLi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uclip32, "V2SiV2SiULi", "", "experimental-p") + +// ucmple8 +TARGET_BUILTIN(__rv__ucmple8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmple8, "eUceSceSc", "", "experimental-p") + +// ucmple16 +TARGET_BUILTIN(__rv__ucmple16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmple16, "eUseSseSs", "", "experimental-p") + +// ucmplt8 +TARGET_BUILTIN(__rv__ucmplt8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmplt8, "eUceSceSc", "", "experimental-p") + +// ucmplt16 +TARGET_BUILTIN(__rv__ucmplt16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ucmplt16, "eUseSseSs", "", "experimental-p") + +// ukadd8 +TARGET_BUILTIN(__rv__ukadd8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukadd8, "eUceUceUc", "", "experimental-p") + +// ukadd16 +TARGET_BUILTIN(__rv__ukadd16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukadd16, "eUseUseUs", "", "experimental-p") + +// ukaddh +TARGET_BUILTIN(__rv__ukaddh, "ULiULiULi", "", "experimental-p") + +// ukaddw +TARGET_BUILTIN(__rv__ukaddw, "ULiULiULi", "", "experimental-p") + +// ukcras16 +TARGET_BUILTIN(__rv__ukcras16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukcras16, "eUseUseUs", "", "experimental-p") + +// ukcrsa16 +TARGET_BUILTIN(__rv__ukcrsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukcrsa16, "eUseUseUs", "", "experimental-p") + +// ukstas16 +TARGET_BUILTIN(__rv__ukstas16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukstas16, "eSseSseSs", "", "experimental-p") + +// ukstsa16 +TARGET_BUILTIN(__rv__ukstsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ukstsa16, "eSseSseSs", "", "experimental-p") + +// uksub8 +TARGET_BUILTIN(__rv__uksub8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uksub8, "eSceSceSc", "", "experimental-p") + +// uksub16 +TARGET_BUILTIN(__rv__uksub16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uksub16, "eSseSseSs", "", "experimental-p") + +// uksubh +TARGET_BUILTIN(__rv__uksubh, "LiLiLi", "", "experimental-p") + +// uksubw +TARGET_BUILTIN(__rv__uksubw, "LiLiLi", "", "experimental-p") + +// umaqa +TARGET_BUILTIN(__rv__umaqa, "ULiULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_umaqa, "eUieUieUceUc", "", "experimental-p") + +// umax8 +TARGET_BUILTIN(__rv__umax8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_umax8, "eSceSceSc", "", "experimental-p") + +// umax16 +TARGET_BUILTIN(__rv__umax16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_umax16, "eSseSseSs", "", "experimental-p") + +// umin8 +TARGET_BUILTIN(__rv__umin8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_umin8, "eSceSceSc", "", "experimental-p") + +// umin16 +TARGET_BUILTIN(__rv__umin16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_umin16, "eSseSseSs", "", "experimental-p") + +// uradd8 +TARGET_BUILTIN(__rv__uradd8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uradd8, "eSceSceSc", "", "experimental-p") + +// uradd16 +TARGET_BUILTIN(__rv__uradd16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_uradd16, "eSseSseSs", "", "experimental-p") + +// uraddw +TARGET_BUILTIN(__rv__uraddw, "LiLiLi", "", "experimental-p") + +// urcras16 +TARGET_BUILTIN(__rv__urcras16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_urcras16, "eSseSseSs", "", "experimental-p") + +// urcrsa16 +TARGET_BUILTIN(__rv__urcrsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_urcrsa16, "eSseSseSs", "", "experimental-p") + +// urstas16 +TARGET_BUILTIN(__rv__urstas16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_urstas16, "eSseSseSs", "", "experimental-p") + +// urstsa16 +TARGET_BUILTIN(__rv__urstsa16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_urstsa16, "eSseSseSs", "", "experimental-p") + +// ursub8 +TARGET_BUILTIN(__rv__ursub8, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ursub8, "eSceSceSc", "", "experimental-p") + +// ursub16 +TARGET_BUILTIN(__rv__ursub16, "ULiULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_ursub16, "eSseSseSs", "", "experimental-p") + +// ursubw +TARGET_BUILTIN(__rv__ursubw, "LiLiLi", "", "experimental-p") + +// zunpkd810 +TARGET_BUILTIN(__rv__zunpkd810, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_zunpkd810, "eSseSc", "", "experimental-p") +// zunpkd820 +TARGET_BUILTIN(__rv__zunpkd820, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_zunpkd820, "eSseSc", "", "experimental-p") +// zunpkd830 +TARGET_BUILTIN(__rv__zunpkd830, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_zunpkd830, "eSseSc", "", "experimental-p") +// zunpkd831 +TARGET_BUILTIN(__rv__zunpkd831, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_zunpkd831, "eSseSc", "", "experimental-p") +// zunpkd832 +TARGET_BUILTIN(__rv__zunpkd832, "ULiULi", "", "experimental-p") +TARGET_BUILTIN(__rv__v_zunpkd832, "eSseSc", "", "experimental-p") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -4037,6 +4037,44 @@ return QualType(New, 0); } +/// getRegisterSizedVectorType - Return the unique reference to a vector type of +/// the specified element type and its width is the same as the register size. +/// VectorType must be a built-in type. +QualType +ASTContext::getRegisterSizedVectorType(QualType VecType, + VectorType::VectorKind VecKind) const { + assert(VecType->isBuiltinType()); + + unsigned NumElts = getTargetInfo().getRegisterWidth() / getTypeSize(VecType); + + if (NumElts <= 1) + return VecType; + + // Check if we've already instantiated a vector of this type. + llvm::FoldingSetNodeID ID; + VectorType::Profile(ID, VecType, NumElts, Type::Vector, VecKind); + + void *InsertPos = nullptr; + if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(VTP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!VecType.isCanonical()) { + Canonical = getVectorType(getCanonicalType(VecType), NumElts, VecKind); + + // Get the new insert position for the node we care about. + VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; + } + auto *New = new (*this, TypeAlignment) + VectorType(VecType, NumElts, Canonical, VecKind); + VectorTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows, unsigned NumColumns) const { llvm::FoldingSetNodeID ID; @@ -10411,6 +10449,15 @@ Type = Context.getExtVectorType(ElementType, NumElements); break; } + case 'e': { + QualType ElementType = DecodeTypeFromStr(Str, Context, Error, + RequiresICE, false); + assert(!RequiresICE && "Can't require vector ICE"); + + Type = Context.getRegisterSizedVectorType(ElementType, + VectorType::GenericVector); + break; + } case 'X': { QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE, false); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17940,6 +17940,649 @@ IntrinsicTypes = {ResultType}; break; } + + // P extension +#define BUILTIN_ID(NAME) \ + case RISCV::BI__rv__##NAME: \ + ID = Intrinsic::riscv_##NAME; \ + break; + +#define BUILTIN_ID_WITH_V(NAME) \ + case RISCV::BI__rv__##NAME: \ + case RISCV::BI__rv__v_##NAME: \ + ID = Intrinsic::riscv_##NAME; \ + break; + +#define BUILTIN_ID_WITH_USV(NAME) \ + case RISCV::BI__rv__##NAME: \ + case RISCV::BI__rv__v_u##NAME: \ + case RISCV::BI__rv__v_s##NAME: \ + ID = Intrinsic::riscv_##NAME; \ + break; + + // Intrinsic type is obtained from Ops[0]. + case RISCV::BI__rv__kabsw: + case RISCV::BI__rv__ave: + case RISCV::BI__rv__bitrev: + case RISCV::BI__rv__kaddh: + case RISCV::BI__rv__kaddw: + case RISCV::BI__rv__ksllw: + case RISCV::BI__rv__kslraw: + case RISCV::BI__rv__kslraw_u: + case RISCV::BI__rv__ksubh: + case RISCV::BI__rv__ksubw: + case RISCV::BI__rv__maxw: + case RISCV::BI__rv__minw: + case RISCV::BI__rv__raddw: + case RISCV::BI__rv__rsubw: + case RISCV::BI__rv__ukaddh: + case RISCV::BI__rv__ukaddw: + case RISCV::BI__rv__uksubh: + case RISCV::BI__rv__uksubw: + case RISCV::BI__rv__uraddw: + case RISCV::BI__rv__ursubw: + case RISCV::BI__rv__bpick: + case RISCV::BI__rv__insb: + + case RISCV::BI__rv__clrs8: + case RISCV::BI__rv__v_clrs8: + case RISCV::BI__rv__clrs16: + case RISCV::BI__rv__v_clrs16: + case RISCV::BI__rv__clrs32: + case RISCV::BI__rv__v_clrs32: + case RISCV::BI__rv__clo8: + case RISCV::BI__rv__v_clo8: + case RISCV::BI__rv__clo16: + case RISCV::BI__rv__v_clo16: + case RISCV::BI__rv__clo32: + case RISCV::BI__rv__v_clo32: + case RISCV::BI__rv__clz8: + case RISCV::BI__rv__v_clz8: + case RISCV::BI__rv__clz16: + case RISCV::BI__rv__v_clz16: + case RISCV::BI__rv__clz32: + case RISCV::BI__rv__v_clz32: + case RISCV::BI__rv__kabs8: + case RISCV::BI__rv__v_kabs8: + case RISCV::BI__rv__kabs16: + case RISCV::BI__rv__v_kabs16: + case RISCV::BI__rv__swap8: + case RISCV::BI__rv__v_swap8: + case RISCV::BI__rv__swap16: + case RISCV::BI__rv__v_swap16: + case RISCV::BI__rv__kadd8: + case RISCV::BI__rv__v_kadd8: + case RISCV::BI__rv__kadd16: + case RISCV::BI__rv__v_kadd16: + case RISCV::BI__rv__kcras16: + case RISCV::BI__rv__v_kcras16: + case RISCV::BI__rv__kcrsa16: + case RISCV::BI__rv__v_kcrsa16: + case RISCV::BI__rv__khm8: + case RISCV::BI__rv__v_khm8: + case RISCV::BI__rv__khmx8: + case RISCV::BI__rv__v_khmx8: + case RISCV::BI__rv__khm16: + case RISCV::BI__rv__v_khm16: + case RISCV::BI__rv__khmx16: + case RISCV::BI__rv__v_khmx16: + case RISCV::BI__rv__kstas16: + case RISCV::BI__rv__v_kstas16: + case RISCV::BI__rv__kstsa16: + case RISCV::BI__rv__v_kstsa16: + case RISCV::BI__rv__ksub8: + case RISCV::BI__rv__v_ksub8: + case RISCV::BI__rv__ksub16: + case RISCV::BI__rv__v_ksub16: + case RISCV::BI__rv__kwmmul: + case RISCV::BI__rv__v_kwmmul: + case RISCV::BI__rv__kwmmul_u: + case RISCV::BI__rv__v_kwmmul_u: + case RISCV::BI__rv__pkbb16: + case RISCV::BI__rv__v_pkbb16: + case RISCV::BI__rv__pkbt16: + case RISCV::BI__rv__v_pkbt16: + case RISCV::BI__rv__pktt16: + case RISCV::BI__rv__v_pktt16: + case RISCV::BI__rv__pktb16: + case RISCV::BI__rv__v_pktb16: + case RISCV::BI__rv__radd8: + case RISCV::BI__rv__v_radd8: + case RISCV::BI__rv__radd16: + case RISCV::BI__rv__v_radd16: + case RISCV::BI__rv__rcras16: + case RISCV::BI__rv__v_rcras16: + case RISCV::BI__rv__rcrsa16: + case RISCV::BI__rv__v_rcrsa16: + case RISCV::BI__rv__rstas16: + case RISCV::BI__rv__v_rstas16: + case RISCV::BI__rv__rstsa16: + case RISCV::BI__rv__v_rstsa16: + case RISCV::BI__rv__rsub8: + case RISCV::BI__rv__v_rsub8: + case RISCV::BI__rv__rsub16: + case RISCV::BI__rv__v_rsub16: + case RISCV::BI__rv__scmple8: + case RISCV::BI__rv__v_scmple8: + case RISCV::BI__rv__scmple16: + case RISCV::BI__rv__v_scmple16: + case RISCV::BI__rv__scmplt8: + case RISCV::BI__rv__v_scmplt8: + case RISCV::BI__rv__scmplt16: + case RISCV::BI__rv__v_scmplt16: + case RISCV::BI__rv__smax8: + case RISCV::BI__rv__v_smax8: + case RISCV::BI__rv__smax16: + case RISCV::BI__rv__v_smax16: + case RISCV::BI__rv__smin8: + case RISCV::BI__rv__v_smin8: + case RISCV::BI__rv__smin16: + case RISCV::BI__rv__v_smin16: + case RISCV::BI__rv__smmul: + case RISCV::BI__rv__v_smmul: + case RISCV::BI__rv__smmul_u: + case RISCV::BI__rv__v_smmul_u: + case RISCV::BI__rv__ucmple8: + case RISCV::BI__rv__v_ucmple8: + case RISCV::BI__rv__ucmple16: + case RISCV::BI__rv__v_ucmple16: + case RISCV::BI__rv__ucmplt8: + case RISCV::BI__rv__v_ucmplt8: + case RISCV::BI__rv__ucmplt16: + case RISCV::BI__rv__v_ucmplt16: + case RISCV::BI__rv__ukadd8: + case RISCV::BI__rv__v_ukadd8: + case RISCV::BI__rv__ukadd16: + case RISCV::BI__rv__v_ukadd16: + case RISCV::BI__rv__ukcras16: + case RISCV::BI__rv__v_ukcras16: + case RISCV::BI__rv__ukcrsa16: + case RISCV::BI__rv__v_ukcrsa16: + case RISCV::BI__rv__ukstas16: + case RISCV::BI__rv__v_ukstas16: + case RISCV::BI__rv__ukstsa16: + case RISCV::BI__rv__v_ukstsa16: + case RISCV::BI__rv__uksub8: + case RISCV::BI__rv__v_uksub8: + case RISCV::BI__rv__uksub16: + case RISCV::BI__rv__v_uksub16: + case RISCV::BI__rv__umax8: + case RISCV::BI__rv__v_umax8: + case RISCV::BI__rv__umax16: + case RISCV::BI__rv__v_umax16: + case RISCV::BI__rv__umin8: + case RISCV::BI__rv__v_umin8: + case RISCV::BI__rv__umin16: + case RISCV::BI__rv__v_umin16: + case RISCV::BI__rv__uradd8: + case RISCV::BI__rv__v_uradd8: + case RISCV::BI__rv__uradd16: + case RISCV::BI__rv__v_uradd16: + case RISCV::BI__rv__urcras16: + case RISCV::BI__rv__v_urcras16: + case RISCV::BI__rv__urcrsa16: + case RISCV::BI__rv__v_urcrsa16: + case RISCV::BI__rv__urstas16: + case RISCV::BI__rv__v_urstas16: + case RISCV::BI__rv__urstsa16: + case RISCV::BI__rv__v_urstsa16: + case RISCV::BI__rv__ursub8: + case RISCV::BI__rv__v_ursub8: + case RISCV::BI__rv__ursub16: + case RISCV::BI__rv__v_ursub16: + case RISCV::BI__rv__kmmac: + case RISCV::BI__rv__v_kmmac: + case RISCV::BI__rv__kmmac_u: + case RISCV::BI__rv__v_kmmac_u: + case RISCV::BI__rv__kmmsb: + case RISCV::BI__rv__v_kmmsb: + case RISCV::BI__rv__kmmsb_u: + case RISCV::BI__rv__v_kmmsb_u: + + case RISCV::BI__rv__add8: + case RISCV::BI__rv__v_uadd8: + case RISCV::BI__rv__v_sadd8: + case RISCV::BI__rv__add16: + case RISCV::BI__rv__v_uadd16: + case RISCV::BI__rv__v_sadd16: + case RISCV::BI__rv__cmpeq8: + case RISCV::BI__rv__v_ucmpeq8: + case RISCV::BI__rv__v_scmpeq8: + case RISCV::BI__rv__cmpeq16: + case RISCV::BI__rv__v_ucmpeq16: + case RISCV::BI__rv__v_scmpeq16: + case RISCV::BI__rv__cras16: + case RISCV::BI__rv__v_ucras16: + case RISCV::BI__rv__v_scras16: + case RISCV::BI__rv__crsa16: + case RISCV::BI__rv__v_ucrsa16: + case RISCV::BI__rv__v_scrsa16: + case RISCV::BI__rv__stas16: + case RISCV::BI__rv__v_ustas16: + case RISCV::BI__rv__v_sstas16: + case RISCV::BI__rv__stsa16: + case RISCV::BI__rv__v_ustsa16: + case RISCV::BI__rv__v_sstsa16: + case RISCV::BI__rv__sub8: + case RISCV::BI__rv__v_usub8: + case RISCV::BI__rv__v_ssub8: + case RISCV::BI__rv__sub16: + case RISCV::BI__rv__v_usub16: + case RISCV::BI__rv__v_ssub16: { + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin ID"); + BUILTIN_ID(kabsw) + BUILTIN_ID(ave) + BUILTIN_ID(bitrev) + BUILTIN_ID(kaddh) + BUILTIN_ID(kaddw) + BUILTIN_ID(ksllw) + BUILTIN_ID(kslraw) + BUILTIN_ID(kslraw_u) + BUILTIN_ID(ksubh) + BUILTIN_ID(ksubw) + BUILTIN_ID(maxw) + BUILTIN_ID(minw) + BUILTIN_ID(raddw) + BUILTIN_ID(rsubw) + BUILTIN_ID(ukaddh) + BUILTIN_ID(ukaddw) + BUILTIN_ID(uksubh) + BUILTIN_ID(uksubw) + BUILTIN_ID(uraddw) + BUILTIN_ID(ursubw) + BUILTIN_ID(bpick) + BUILTIN_ID(insb) + + BUILTIN_ID_WITH_V(clrs8) + BUILTIN_ID_WITH_V(clrs16) + BUILTIN_ID_WITH_V(clrs32) + BUILTIN_ID_WITH_V(clo8) + BUILTIN_ID_WITH_V(clo16) + BUILTIN_ID_WITH_V(clo32) + BUILTIN_ID_WITH_V(clz8) + BUILTIN_ID_WITH_V(clz16) + BUILTIN_ID_WITH_V(clz32) + BUILTIN_ID_WITH_V(kabs8) + BUILTIN_ID_WITH_V(kabs16) + BUILTIN_ID_WITH_V(swap8) + BUILTIN_ID_WITH_V(swap16) + BUILTIN_ID_WITH_V(kadd8) + BUILTIN_ID_WITH_V(kadd16) + BUILTIN_ID_WITH_V(kcras16) + BUILTIN_ID_WITH_V(kcrsa16) + BUILTIN_ID_WITH_V(khm8) + BUILTIN_ID_WITH_V(khmx8) + BUILTIN_ID_WITH_V(khm16) + BUILTIN_ID_WITH_V(khmx16) + BUILTIN_ID_WITH_V(kstas16) + BUILTIN_ID_WITH_V(kstsa16) + BUILTIN_ID_WITH_V(ksub8) + BUILTIN_ID_WITH_V(ksub16) + BUILTIN_ID_WITH_V(kwmmul) + BUILTIN_ID_WITH_V(kwmmul_u) + BUILTIN_ID_WITH_V(pkbb16) + BUILTIN_ID_WITH_V(pkbt16) + BUILTIN_ID_WITH_V(pktt16) + BUILTIN_ID_WITH_V(pktb16) + BUILTIN_ID_WITH_V(radd8) + BUILTIN_ID_WITH_V(radd16) + BUILTIN_ID_WITH_V(rcras16) + BUILTIN_ID_WITH_V(rcrsa16) + BUILTIN_ID_WITH_V(rstas16) + BUILTIN_ID_WITH_V(rstsa16) + BUILTIN_ID_WITH_V(rsub8) + BUILTIN_ID_WITH_V(rsub16) + BUILTIN_ID_WITH_V(scmple8) + BUILTIN_ID_WITH_V(scmple16) + BUILTIN_ID_WITH_V(scmplt8) + BUILTIN_ID_WITH_V(scmplt16) + BUILTIN_ID_WITH_V(smax8) + BUILTIN_ID_WITH_V(smax16) + BUILTIN_ID_WITH_V(smin8) + BUILTIN_ID_WITH_V(smin16) + BUILTIN_ID_WITH_V(smmul) + BUILTIN_ID_WITH_V(smmul_u) + BUILTIN_ID_WITH_V(ucmple8) + BUILTIN_ID_WITH_V(ucmple16) + BUILTIN_ID_WITH_V(ucmplt8) + BUILTIN_ID_WITH_V(ucmplt16) + BUILTIN_ID_WITH_V(ukadd8) + BUILTIN_ID_WITH_V(ukadd16) + BUILTIN_ID_WITH_V(ukcras16) + BUILTIN_ID_WITH_V(ukcrsa16) + BUILTIN_ID_WITH_V(ukstas16) + BUILTIN_ID_WITH_V(ukstsa16) + BUILTIN_ID_WITH_V(uksub8) + BUILTIN_ID_WITH_V(uksub16) + BUILTIN_ID_WITH_V(umax8) + BUILTIN_ID_WITH_V(umax16) + BUILTIN_ID_WITH_V(umin8) + BUILTIN_ID_WITH_V(umin16) + BUILTIN_ID_WITH_V(uradd8) + BUILTIN_ID_WITH_V(uradd16) + BUILTIN_ID_WITH_V(urcras16) + BUILTIN_ID_WITH_V(urcrsa16) + BUILTIN_ID_WITH_V(urstas16) + BUILTIN_ID_WITH_V(urstsa16) + BUILTIN_ID_WITH_V(ursub8) + BUILTIN_ID_WITH_V(ursub16) + BUILTIN_ID_WITH_V(kmmac) + BUILTIN_ID_WITH_V(kmmac_u) + BUILTIN_ID_WITH_V(kmmsb) + BUILTIN_ID_WITH_V(kmmsb_u) + + BUILTIN_ID_WITH_USV(add8) + BUILTIN_ID_WITH_USV(add16) + BUILTIN_ID_WITH_USV(cmpeq8) + BUILTIN_ID_WITH_USV(cmpeq16) + BUILTIN_ID_WITH_USV(cras16) + BUILTIN_ID_WITH_USV(crsa16) + BUILTIN_ID_WITH_USV(stas16) + BUILTIN_ID_WITH_USV(stsa16) + BUILTIN_ID_WITH_USV(sub8) + BUILTIN_ID_WITH_USV(sub16) + } + + IntrinsicTypes = {Ops[0]->getType()}; + break; + } + + // Intrinsic type is obtained from ResultType. + case RISCV::BI__rv__sunpkd810: + case RISCV::BI__rv__v_sunpkd810: + case RISCV::BI__rv__sunpkd820: + case RISCV::BI__rv__v_sunpkd820: + case RISCV::BI__rv__sunpkd830: + case RISCV::BI__rv__v_sunpkd830: + case RISCV::BI__rv__sunpkd831: + case RISCV::BI__rv__v_sunpkd831: + case RISCV::BI__rv__sunpkd832: + case RISCV::BI__rv__v_sunpkd832: + case RISCV::BI__rv__zunpkd810: + case RISCV::BI__rv__v_zunpkd810: + case RISCV::BI__rv__zunpkd820: + case RISCV::BI__rv__v_zunpkd820: + case RISCV::BI__rv__zunpkd830: + case RISCV::BI__rv__v_zunpkd830: + case RISCV::BI__rv__zunpkd831: + case RISCV::BI__rv__v_zunpkd831: + case RISCV::BI__rv__zunpkd832: + case RISCV::BI__rv__v_zunpkd832: { + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin ID"); + // Unpacking builtins have two kinds of intrinsic, one of which is + // both result and operands are the same scalar type, the other is + // operand's element width is half of result's element but with + // more elements. + BUILTIN_ID(sunpkd810) + BUILTIN_ID(v_sunpkd810) + BUILTIN_ID(sunpkd820) + BUILTIN_ID(v_sunpkd820) + BUILTIN_ID(sunpkd830) + BUILTIN_ID(v_sunpkd830) + BUILTIN_ID(sunpkd831) + BUILTIN_ID(v_sunpkd831) + BUILTIN_ID(sunpkd832) + BUILTIN_ID(v_sunpkd832) + BUILTIN_ID(zunpkd810) + BUILTIN_ID(v_zunpkd810) + BUILTIN_ID(zunpkd820) + BUILTIN_ID(v_zunpkd820) + BUILTIN_ID(zunpkd830) + BUILTIN_ID(v_zunpkd830) + BUILTIN_ID(zunpkd831) + BUILTIN_ID(v_zunpkd831) + BUILTIN_ID(zunpkd832) + BUILTIN_ID(v_zunpkd832) + } + + IntrinsicTypes = {ResultType}; + break; + } + + // Intrinsic type is obtained from ResultType and Ops[0]. + case RISCV::BI__rv__kdmbb: + case RISCV::BI__rv__v_kdmbb: + case RISCV::BI__rv__kdmbt: + case RISCV::BI__rv__v_kdmbt: + case RISCV::BI__rv__kdmtt: + case RISCV::BI__rv__v_kdmtt: + case RISCV::BI__rv__khmbb: + case RISCV::BI__rv__v_khmbb: + case RISCV::BI__rv__khmbt: + case RISCV::BI__rv__v_khmbt: + case RISCV::BI__rv__khmtt: + case RISCV::BI__rv__v_khmtt: + case RISCV::BI__rv__kmda: + case RISCV::BI__rv__v_kmda: + case RISCV::BI__rv__kmxda: + case RISCV::BI__rv__v_kmxda: + case RISCV::BI__rv__pbsad: + case RISCV::BI__rv__v_pbsad: + case RISCV::BI__rv__smbb16: + case RISCV::BI__rv__v_smbb16: + case RISCV::BI__rv__smbt16: + case RISCV::BI__rv__v_smbt16: + case RISCV::BI__rv__smtt16: + case RISCV::BI__rv__v_smtt16: + case RISCV::BI__rv__smds: + case RISCV::BI__rv__v_smds: + case RISCV::BI__rv__smdrs: + case RISCV::BI__rv__v_smdrs: + case RISCV::BI__rv__smxds: + case RISCV::BI__rv__v_smxds: { + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin ID"); + BUILTIN_ID_WITH_V(kdmbb) + BUILTIN_ID_WITH_V(kdmbt) + BUILTIN_ID_WITH_V(kdmtt) + BUILTIN_ID_WITH_V(khmbb) + BUILTIN_ID_WITH_V(khmbt) + BUILTIN_ID_WITH_V(khmtt) + BUILTIN_ID_WITH_V(kmda) + BUILTIN_ID_WITH_V(kmxda) + BUILTIN_ID_WITH_V(pbsad) + BUILTIN_ID_WITH_V(smbb16) + BUILTIN_ID_WITH_V(smbt16) + BUILTIN_ID_WITH_V(smtt16) + BUILTIN_ID_WITH_V(smds) + BUILTIN_ID_WITH_V(smdrs) + BUILTIN_ID_WITH_V(smxds) + } + + IntrinsicTypes = {ResultType, Ops[0]->getType()}; + break; + } + + // Intrinsic type is obtained from ResultType and Ops[1]. + case RISCV::BI__rv__sra_u: + case RISCV::BI__rv__kmmwb2: + case RISCV::BI__rv__v_kmmwb2: + case RISCV::BI__rv__kmmwb2_u: + case RISCV::BI__rv__v_kmmwb2_u: + case RISCV::BI__rv__kmmwt2: + case RISCV::BI__rv__v_kmmwt2: + case RISCV::BI__rv__kmmwt2_u: + case RISCV::BI__rv__v_kmmwt2_u: + case RISCV::BI__rv__smmwb: + case RISCV::BI__rv__v_smmwb: + case RISCV::BI__rv__smmwb_u: + case RISCV::BI__rv__v_smmwb_u: + case RISCV::BI__rv__smmwt: + case RISCV::BI__rv__v_smmwt: + case RISCV::BI__rv__smmwt_u: + case RISCV::BI__rv__v_smmwt_u: + case RISCV::BI__rv__ksll8: + case RISCV::BI__rv__v_ksll8: + case RISCV::BI__rv__ksll16: + case RISCV::BI__rv__v_ksll16: + case RISCV::BI__rv__kslra8: + case RISCV::BI__rv__v_kslra8: + case RISCV::BI__rv__kslra8_u: + case RISCV::BI__rv__v_kslra8_u: + case RISCV::BI__rv__kslra16: + case RISCV::BI__rv__v_kslra16: + case RISCV::BI__rv__kslra16_u: + case RISCV::BI__rv__v_kslra16_u: + case RISCV::BI__rv__sclip8: + case RISCV::BI__rv__v_sclip8: + case RISCV::BI__rv__sclip16: + case RISCV::BI__rv__v_sclip16: + case RISCV::BI__rv__sclip32: + case RISCV::BI__rv__v_sclip32: + case RISCV::BI__rv__sll8: + case RISCV::BI__rv__v_sll8: + case RISCV::BI__rv__sll16: + case RISCV::BI__rv__v_sll16: + case RISCV::BI__rv__sra8: + case RISCV::BI__rv__v_sra8: + case RISCV::BI__rv__sra8_u: + case RISCV::BI__rv__v_sra8_u: + case RISCV::BI__rv__sra16: + case RISCV::BI__rv__v_sra16: + case RISCV::BI__rv__sra16_u: + case RISCV::BI__rv__v_sra16_u: + case RISCV::BI__rv__srl8: + case RISCV::BI__rv__v_srl8: + case RISCV::BI__rv__srl8_u: + case RISCV::BI__rv__v_srl8_u: + case RISCV::BI__rv__srl16: + case RISCV::BI__rv__v_srl16: + case RISCV::BI__rv__srl16_u: + case RISCV::BI__rv__v_srl16_u: + case RISCV::BI__rv__uclip8: + case RISCV::BI__rv__v_uclip8: + case RISCV::BI__rv__uclip16: + case RISCV::BI__rv__v_uclip16: + case RISCV::BI__rv__uclip32: + case RISCV::BI__rv__v_uclip32: + case RISCV::BI__rv__kdmabb: + case RISCV::BI__rv__v_kdmabb: + case RISCV::BI__rv__kdmabt: + case RISCV::BI__rv__v_kdmabt: + case RISCV::BI__rv__kdmatt: + case RISCV::BI__rv__v_kdmatt: + case RISCV::BI__rv__kmabb: + case RISCV::BI__rv__v_kmabb: + case RISCV::BI__rv__kmabt: + case RISCV::BI__rv__v_kmabt: + case RISCV::BI__rv__kmatt: + case RISCV::BI__rv__v_kmatt: + case RISCV::BI__rv__kmada: + case RISCV::BI__rv__v_kmada: + case RISCV::BI__rv__kmaxda: + case RISCV::BI__rv__v_kmaxda: + case RISCV::BI__rv__kmads: + case RISCV::BI__rv__v_kmads: + case RISCV::BI__rv__kmadrs: + case RISCV::BI__rv__v_kmadrs: + case RISCV::BI__rv__kmaxds: + case RISCV::BI__rv__v_kmaxds: + case RISCV::BI__rv__kmsda: + case RISCV::BI__rv__v_kmsda: + case RISCV::BI__rv__kmsxda: + case RISCV::BI__rv__v_kmsxda: + case RISCV::BI__rv__pbsada: + case RISCV::BI__rv__v_pbsada: + case RISCV::BI__rv__smaqa: + case RISCV::BI__rv__v_smaqa: + case RISCV::BI__rv__smaqa_su: + case RISCV::BI__rv__v_smaqa_su: + case RISCV::BI__rv__umaqa: + case RISCV::BI__rv__v_umaqa: { + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin ID"); + BUILTIN_ID(sra_u) + BUILTIN_ID_WITH_V(kmmwb2) + BUILTIN_ID_WITH_V(kmmwb2_u) + BUILTIN_ID_WITH_V(kmmwt2) + BUILTIN_ID_WITH_V(kmmwt2_u) + BUILTIN_ID_WITH_V(smmwb) + BUILTIN_ID_WITH_V(smmwb_u) + BUILTIN_ID_WITH_V(smmwt) + BUILTIN_ID_WITH_V(smmwt_u) + BUILTIN_ID_WITH_V(ksll8) + BUILTIN_ID_WITH_V(ksll16) + BUILTIN_ID_WITH_V(kslra8) + BUILTIN_ID_WITH_V(kslra8_u) + BUILTIN_ID_WITH_V(kslra16) + BUILTIN_ID_WITH_V(kslra16_u) + BUILTIN_ID_WITH_V(sclip8) + BUILTIN_ID_WITH_V(sclip16) + BUILTIN_ID_WITH_V(sclip32) + BUILTIN_ID_WITH_V(sll8) + BUILTIN_ID_WITH_V(sll16) + BUILTIN_ID_WITH_V(sra8) + BUILTIN_ID_WITH_V(sra8_u) + BUILTIN_ID_WITH_V(sra16) + BUILTIN_ID_WITH_V(sra16_u) + BUILTIN_ID_WITH_V(srl8) + BUILTIN_ID_WITH_V(srl8_u) + BUILTIN_ID_WITH_V(srl16) + BUILTIN_ID_WITH_V(srl16_u) + BUILTIN_ID_WITH_V(uclip8) + BUILTIN_ID_WITH_V(uclip16) + BUILTIN_ID_WITH_V(uclip32) + BUILTIN_ID_WITH_V(kdmabb) + BUILTIN_ID_WITH_V(kdmabt) + BUILTIN_ID_WITH_V(kdmatt) + BUILTIN_ID_WITH_V(kmabb) + BUILTIN_ID_WITH_V(kmabt) + BUILTIN_ID_WITH_V(kmatt) + BUILTIN_ID_WITH_V(kmada) + BUILTIN_ID_WITH_V(kmaxda) + BUILTIN_ID_WITH_V(kmads) + BUILTIN_ID_WITH_V(kmadrs) + BUILTIN_ID_WITH_V(kmaxds) + BUILTIN_ID_WITH_V(kmsda) + BUILTIN_ID_WITH_V(kmsxda) + BUILTIN_ID_WITH_V(pbsada) + BUILTIN_ID_WITH_V(smaqa) + BUILTIN_ID_WITH_V(smaqa_su) + BUILTIN_ID_WITH_V(umaqa) + } + + IntrinsicTypes = {ConvertType(E->getType()), Ops[1]->getType()}; + break; + } + + // Intrinsic type is obtained from ResultType and Ops[2]. + case RISCV::BI__rv__kmmawb: + case RISCV::BI__rv__v_kmmawb: + case RISCV::BI__rv__kmmawb_u: + case RISCV::BI__rv__v_kmmawb_u: + case RISCV::BI__rv__kmmawb2: + case RISCV::BI__rv__v_kmmawb2: + case RISCV::BI__rv__kmmawb2_u: + case RISCV::BI__rv__v_kmmawb2_u: + case RISCV::BI__rv__kmmawt: + case RISCV::BI__rv__v_kmmawt: + case RISCV::BI__rv__kmmawt_u: + case RISCV::BI__rv__v_kmmawt_u: + case RISCV::BI__rv__kmmawt2: + case RISCV::BI__rv__v_kmmawt2: + case RISCV::BI__rv__kmmawt2_u: + case RISCV::BI__rv__v_kmmawt2_u: { + switch (BuiltinID) { + default: llvm_unreachable("unexpected builtin ID"); + BUILTIN_ID_WITH_V(kmmawb) + BUILTIN_ID_WITH_V(kmmawb_u) + BUILTIN_ID_WITH_V(kmmawb2) + BUILTIN_ID_WITH_V(kmmawb2_u) + BUILTIN_ID_WITH_V(kmmawt) + BUILTIN_ID_WITH_V(kmmawt_u) + BUILTIN_ID_WITH_V(kmmawt2) + BUILTIN_ID_WITH_V(kmmawt2_u) + } + + IntrinsicTypes = {ResultType, Ops[2]->getType()}; + break; + } +#undef BUILTIN_ID +#undef BUILTIN_ID_WITH_V +#undef BUILTIN_ID_WITH_USV + // Vector builtins are handled from here. #include "clang/Basic/riscv_vector_builtin_cg.inc" } diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/rv32p.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/rv32p.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/rv32p.c @@ -0,0 +1,3583 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv32 -O2 -target-feature +experimental-p \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-RV32 %s + +typedef signed char int8x4_t __attribute((vector_size(4))); +typedef short int16x2_t __attribute((vector_size(4))); +typedef unsigned char uint8x4_t __attribute__((vector_size(4))); +typedef unsigned short uint16x2_t __attribute__((vector_size(4))); + +// CHECK-RV32-LABEL: @add8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.add8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long add8(unsigned long a, unsigned long b) { + return __rv__add8(a, b); +} + +// CHECK-RV32-LABEL: @v_uadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.add8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_uadd8(uint8x4_t a, uint8x4_t b) { + return __rv__v_uadd8(a, b); +} + +// CHECK-RV32-LABEL: @v_sadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.add8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_sadd8(int8x4_t a, int8x4_t b) { + return __rv__v_sadd8(a, b); +} + +// CHECK-RV32-LABEL: @add16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.add16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long add16(unsigned long a, unsigned long b) { + return __rv__add16(a, b); +} + +// CHECK-RV32-LABEL: @v_uadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.add16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_uadd16(uint16x2_t a, uint16x2_t b) { + return __rv__v_uadd16(a, b); +} + +// CHECK-RV32-LABEL: @v_sadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.add16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_sadd16(int16x2_t a, int16x2_t b) { + return __rv__v_sadd16(a, b); +} + +// CHECK-RV32-LABEL: @ave( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ave.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ave(long a, long b) { + return __rv__ave(a, b); +} + +// CHECK-RV32-LABEL: @bitrev( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.bitrev.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long bitrev(unsigned long a, unsigned long b) { + return __rv__bitrev(a, b); +} + +// CHECK-RV32-LABEL: @bpick( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.bpick.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long bpick(unsigned long a, unsigned long b, unsigned long c) { + return __rv__bpick(a, b, c); +} + +// CHECK-RV32-LABEL: @clrs8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clrs8.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clrs8(unsigned long a) { + return __rv__clrs8(a); +} + +// CHECK-RV32-LABEL: @v_clrs8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.clrs8.v4i8(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_clrs8(int8x4_t a) { + return __rv__v_clrs8(a); +} + +// CHECK-RV32-LABEL: @clrs16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clrs16.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clrs16(unsigned long a) { + return __rv__clrs16(a); +} + +// CHECK-RV32-LABEL: @v_clrs16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.clrs16.v2i16(<2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_clrs16(int16x2_t a) { + return __rv__v_clrs16(a); +} + +// CHECK-RV32-LABEL: @clrs32( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clrs32.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clrs32(unsigned long a) { + return __rv__clrs32(a); +} + +// CHECK-RV32-LABEL: @clo8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clo8.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clo8(unsigned long a) { + return __rv__clo8(a); +} + +// CHECK-RV32-LABEL: @v_clo8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.clo8.v4i8(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_clo8(uint8x4_t a) { + return __rv__v_clo8(a); +} + +// CHECK-RV32-LABEL: @clo16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clo16.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clo16(unsigned long a) { + return __rv__clo16(a); +} + +// CHECK-RV32-LABEL: @v_clo16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.clo16.v2i16(<2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_clo16(uint16x2_t a) { + return __rv__v_clo16(a); +} + +// CHECK-RV32-LABEL: @clo32( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clo32.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clo32(unsigned long a) { + return __rv__clo32(a); +} + +// CHECK-RV32-LABEL: @clz8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clz8.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clz8(unsigned long a) { + return __rv__clz8(a); +} + +// CHECK-RV32-LABEL: @v_clz8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.clz8.v4i8(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_clz8(uint8x4_t a) { + return __rv__v_clz8(a); +} + +// CHECK-RV32-LABEL: @clz16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clz16.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clz16(unsigned long a) { + return __rv__clz16(a); +} + +// CHECK-RV32-LABEL: @v_clz16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.clz16.v2i16(<2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_clz16(uint16x2_t a) { + return __rv__v_clz16(a); +} + +// CHECK-RV32-LABEL: @clz32( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.clz32.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long clz32(unsigned long a) { + return __rv__clz32(a); +} + +// CHECK-RV32-LABEL: @cmpeq8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.cmpeq8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long cmpeq8(unsigned long a, unsigned long b) { + return __rv__cmpeq8(a, b); +} + +// CHECK-RV32-LABEL: @v_scmpeq8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.cmpeq8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_scmpeq8(int8x4_t a, int8x4_t b) { + return __rv__v_scmpeq8(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmpeq8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.cmpeq8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_ucmpeq8(uint8x4_t a, uint8x4_t b) { + return __rv__v_ucmpeq8(a, b); +} + +// CHECK-RV32-LABEL: @cmpeq16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.cmpeq16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long cmpeq16(unsigned long a, unsigned long b) { + return __rv__cmpeq16(a, b); +} + +// CHECK-RV32-LABEL: @v_scmpeq16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.cmpeq16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_scmpeq16(int16x2_t a, int16x2_t b) { + return __rv__v_scmpeq16(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmpeq16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.cmpeq16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ucmpeq16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ucmpeq16(a, b); +} + +// CHECK-RV32-LABEL: @cras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.cras16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long cras16(unsigned long a, unsigned long b) { + return __rv__cras16(a, b); +} + +// CHECK-RV32-LABEL: @v_ucras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.cras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ucras16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ucras16(a, b); +} + +// CHECK-RV32-LABEL: @v_scras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.cras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_scras16(int16x2_t a, int16x2_t b) { + return __rv__v_scras16(a, b); +} + +// CHECK-RV32-LABEL: @crsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.crsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long crsa16(unsigned long a, unsigned long b) { + return __rv__crsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_ucrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.crsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ucrsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ucrsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_scrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.crsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_scrsa16(int16x2_t a, int16x2_t b) { + return __rv__v_scrsa16(a, b); +} + +// CHECK-RV32-LABEL: @insb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.insb.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 3) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long insb(unsigned long a, unsigned long b) { + return __rv__insb(a, b, 3); +} + +// CHECK-RV32-LABEL: @kabs8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kabs8.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kabs8(unsigned long a) { + return __rv__kabs8(a); +} + +// CHECK-RV32-LABEL: @v_kabs8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.kabs8.v4i8(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_kabs8(int8x4_t a) { + return __rv__v_kabs8(a); +} + +// CHECK-RV32-LABEL: @kabs16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kabs16.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kabs16(unsigned long a) { + return __rv__kabs16(a); +} + +// CHECK-RV32-LABEL: @v_kabs16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.kabs16.v2i16(<2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_kabs16(int16x2_t a) { + return __rv__v_kabs16(a); +} + +// CHECK-RV32-LABEL: @kabsw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kabsw.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kabsw(long a) { + return __rv__kabsw(a); +} + +// CHECK-RV32-LABEL: @kadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kadd8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kadd8(unsigned long a, unsigned long b) { + return __rv__kadd8(a, b); +} + +// CHECK-RV32-LABEL: @v_kadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.kadd8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_kadd8(int8x4_t a, int8x4_t b) { + return __rv__v_kadd8(a, b); +} + +// CHECK-RV32-LABEL: @kadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kadd16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kadd16(unsigned long a, unsigned long b) { + return __rv__kadd16(a, b); +} + +// CHECK-RV32-LABEL: @v_kadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.kadd16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_kadd16(int16x2_t a, int16x2_t b) { + return __rv__v_kadd16(a, b); +} + +// CHECK-RV32-LABEL: @kaddh( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kaddh.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kaddh(int a, int b) { + return __rv__kaddh(a, b); +} + +// CHECK-RV32-LABEL: @kaddw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kaddw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kaddw(int a, int b) { + return __rv__kaddw(a, b); +} + +// CHECK-RV32-LABEL: @kcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kcras16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kcras16(unsigned long a, unsigned long b) { + return __rv__kcras16(a, b); +} + +// CHECK-RV32-LABEL: @v_kcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.kcras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_kcras16(int16x2_t a, int16x2_t b) { + return __rv__v_kcras16(a, b); +} + +// CHECK-RV32-LABEL: @kcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kcrsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kcrsa16(unsigned long a, unsigned long b) { + return __rv__kcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_kcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.kcrsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_kcrsa16(int16x2_t a, int16x2_t b) { + return __rv__v_kcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @kdmbb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmbb.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmbb(unsigned int a, unsigned int b) { + return __rv__kdmbb(a, b); +} + +// CHECK-RV32-LABEL: @v_kdmbb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmbb.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmbb(int16x2_t a, int16x2_t b) { + return __rv__v_kdmbb(a, b); +} + +// CHECK-RV32-LABEL: @kdmbt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmbt.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmbt(unsigned int a, unsigned int b) { + return __rv__kdmbt(a, b); +} + +// CHECK-RV32-LABEL: @v_kdmbt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmbt.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmbt(int16x2_t a, int16x2_t b) { + return __rv__v_kdmbt(a, b); +} + +// CHECK-RV32-LABEL: @kdmtt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmtt.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmtt(unsigned int a, unsigned int b) { + return __rv__kdmtt(a, b); +} + +// CHECK-RV32-LABEL: @v_kdmtt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmtt.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmtt(int16x2_t a, int16x2_t b) { + return __rv__v_kdmtt(a, b); +} + +// CHECK-RV32-LABEL: @kdmabb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmabb.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmabb(long t, unsigned int a, unsigned int b) { + return __rv__kdmabb(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kdmabb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmabb.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmabb(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kdmabb(t, a, b); +} + +// CHECK-RV32-LABEL: @kdmabt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmabt.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmabt(long t, unsigned int a, unsigned int b) { + return __rv__kdmabt(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kdmabt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmabt.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmabt(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kdmabt(t, a, b); +} + +// CHECK-RV32-LABEL: @kdmatt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kdmatt.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kdmatt(long t, unsigned int a, unsigned int b) { + return __rv__kdmatt(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kdmatt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kdmatt.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kdmatt(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kdmatt(t, a, b); +} + +// CHECK-RV32-LABEL: @khm8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khm8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long khm8(unsigned long a, unsigned long b) { + return __rv__khm8(a, b); +} + +// CHECK-RV32-LABEL: @v_khm8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.khm8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_khm8(int8x4_t a, int8x4_t b) { + return __rv__v_khm8(a, b); +} + +// CHECK-RV32-LABEL: @khmx8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khmx8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long khmx8(unsigned long a, unsigned long b) { + return __rv__khmx8(a, b); +} + +// CHECK-RV32-LABEL: @v_khmx8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.khmx8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_khmx8(int8x4_t a, int8x4_t b) { + return __rv__v_khmx8(a, b); +} + +// CHECK-RV32-LABEL: @khm16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khm16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long khm16(unsigned long a, unsigned long b) { + return __rv__khm16(a, b); +} + +// CHECK-RV32-LABEL: @v_khm16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.khm16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_khm16(int16x2_t a, int16x2_t b) { + return __rv__v_khm16(a, b); +} + +// CHECK-RV32-LABEL: @khmx16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khmx16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long khmx16(unsigned long a, unsigned long b) { + return __rv__khmx16(a, b); +} + +// CHECK-RV32-LABEL: @v_khmx16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.khmx16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_khmx16(int16x2_t a, int16x2_t b) { + return __rv__v_khmx16(a, b); +} + +// CHECK-RV32-LABEL: @khmbb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khmbb.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long khmbb(unsigned int a, unsigned int b) { + return __rv__khmbb(a, b); +} + +// CHECK-RV32-LABEL: @v_khmbb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.khmbb.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_khmbb(int16x2_t a, int16x2_t b) { + return __rv__v_khmbb(a, b); +} + +// CHECK-RV32-LABEL: @khmbt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khmbt.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long khmbt(unsigned int a, unsigned int b) { + return __rv__khmbt(a, b); +} + +// CHECK-RV32-LABEL: @v_khmbt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.khmbt.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_khmbt(int16x2_t a, int16x2_t b) { + return __rv__v_khmbt(a, b); +} + +// CHECK-RV32-LABEL: @khmtt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.khmtt.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long khmtt(unsigned int a, unsigned int b) { + return __rv__khmtt(a, b); +} + +// CHECK-RV32-LABEL: @v_khmtt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.khmtt.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_khmtt(int16x2_t a, int16x2_t b) { + return __rv__v_khmtt(a, b); +} + +// CHECK-RV32-LABEL: @kmabb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmabb.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmabb(long t, unsigned long a, unsigned long b) { + return __rv__kmabb(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmabb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmabb.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmabb(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmabb(t, a, b); +} + +// CHECK-RV32-LABEL: @kmabt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmabt.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmabt(long t, unsigned long a, unsigned long b) { + return __rv__kmabt(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmabt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmabt.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmabt(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmabt(t, a, b); +} + +// CHECK-RV32-LABEL: @kmatt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmatt.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmatt(long t, unsigned long a, unsigned long b) { + return __rv__kmatt(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmatt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmatt.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmatt(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmatt(t, a, b); +} + +// CHECK-RV32-LABEL: @kmada( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmada.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmada(long t, unsigned long a, unsigned long b) { + return __rv__kmada(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmada( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmada.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmada(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmada(t, a, b); +} + +// CHECK-RV32-LABEL: @kmaxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmaxda.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmaxda(long t, unsigned long a, unsigned long b) { + return __rv__kmaxda(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmaxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmaxda.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmaxda(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmaxda(t, a, b); +} + +// CHECK-RV32-LABEL: @kmads( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmads.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmads(long t, unsigned long a, unsigned long b) { + return __rv__kmads(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmads( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmads.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmads(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmads(t, a, b); +} + +// CHECK-RV32-LABEL: @kmadrs( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmadrs.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmadrs(long t, unsigned long a, unsigned long b) { + return __rv__kmadrs(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmadrs( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmadrs.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmadrs(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmadrs(t, a, b); +} + +// CHECK-RV32-LABEL: @kmaxds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmaxds.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmaxds(long t, unsigned long a, unsigned long b) { + return __rv__kmaxds(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmaxds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmaxds.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmaxds(long t, int16x2_t a, int16x2_t b) { + return __rv__v_kmaxds(t, a, b); +} + +// CHECK-RV32-LABEL: @kmda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmda.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmda(unsigned long a, unsigned long b) { + return __rv__kmda(a, b); +} + +// CHECK-RV32-LABEL: @v_kmda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmda.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmda(int16x2_t a, int16x2_t b) { + return __rv__v_kmda(a, b); +} + +// CHECK-RV32-LABEL: @kmxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmxda.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmxda(unsigned long a, unsigned long b) { + return __rv__kmxda(a, b); +} + +// CHECK-RV32-LABEL: @v_kmxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmxda.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +long v_kmxda(int16x2_t a, int16x2_t b) { + return __rv__v_kmxda(a, b); +} + +// CHECK-RV32-LABEL: @kmmac( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmac.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmac(long t, long a, long b) { + return __rv__kmmac(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmac_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmac.u.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmac_u(long t, long a, long b) { + return __rv__kmmac_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawb.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawb(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawb.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawb(int t, int a, int16x2_t b) { + return __rv__v_kmmawb(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawb_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawb.u.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawb_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb_u(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawb_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawb.u.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawb_u(int t, int a, int16x2_t b) { + return __rv__v_kmmawb_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawb2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawb2.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawb2(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb2(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawb2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawb2.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawb2(int t, int a, int16x2_t b) { + return __rv__v_kmmawb2(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawb2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawb2.u.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawb2_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb2_u(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawb2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawb2.u.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawb2_u(int t, int a, int16x2_t b) { + return __rv__v_kmmawb2_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawt.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawt(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawt.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawt(int t, int a, int16x2_t b) { + return __rv__v_kmmawt(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawt_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawt.u.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawt_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt_u(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawt_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawt.u.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawt_u(int t, int a, int16x2_t b) { + return __rv__v_kmmawt_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawt2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawt2.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawt2(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt2(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawt2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawt2.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawt2(int t, int a, int16x2_t b) { + return __rv__v_kmmawt2(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmawt2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmawt2.u.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmawt2_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt2_u(t, a, b); +} + +// CHECK-RV32-LABEL: @v_kmmawt2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmawt2.u.i32.v2i16(i32 [[T:%.*]], i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmawt2_u(int t, int a, int16x2_t b) { + return __rv__v_kmmawt2_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmsb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmsb.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmsb(long t, long a, long b) { + return __rv__kmmsb(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmsb_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmsb.u.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmsb_u(long t, long a, long b) { + return __rv__kmmsb_u(t, a, b); +} + +// CHECK-RV32-LABEL: @kmmwb2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmwb2.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmwb2(long a, unsigned long b) { + return __rv__kmmwb2(a, b); +} + +// CHECK-RV32-LABEL: @v_kmmwb2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmwb2.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmwb2(int a, int16x2_t b) { + return __rv__v_kmmwb2(a, b); +} + +// CHECK-RV32-LABEL: @kmmwb2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmwb2.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmwb2_u(long a, unsigned long b) { + return __rv__kmmwb2_u(a, b); +} + +// CHECK-RV32-LABEL: @v_kmmwb2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmwb2.u.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmwb2_u(int a, int16x2_t b) { + return __rv__v_kmmwb2_u(a, b); +} + +// CHECK-RV32-LABEL: @kmmwt2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmwt2.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmwt2(long a, unsigned long b) { + return __rv__kmmwt2(a, b); +} + +// CHECK-RV32-LABEL: @v_kmmwt2( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmwt2.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmwt2(int a, int16x2_t b) { + return __rv__v_kmmwt2(a, b); +} + +// CHECK-RV32-LABEL: @kmmwt2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmmwt2.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmmwt2_u(long a, unsigned long b) { + return __rv__kmmwt2_u(a, b); +} + +// CHECK-RV32-LABEL: @v_kmmwt2_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.kmmwt2.u.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_kmmwt2_u(int a, int16x2_t b) { + return __rv__v_kmmwt2_u(a, b); +} + +// CHECK-RV32-LABEL: @kmsda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmsda.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmsda(long t, unsigned long a, unsigned long b) { + return __rv__kmsda(t, a, b); +} + +// CHECK-RV32-LABEL: @k_kmsda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmsda.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int k_kmsda(int t, int16x2_t a, int16x2_t b) { + return __rv__v_kmsda(t, a, b); +} + +// CHECK-RV32-LABEL: @kmsxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kmsxda.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kmsxda(long t, unsigned long a, unsigned long b) { + return __rv__kmsxda(t, a, b); +} + +// CHECK-RV32-LABEL: @k_kmsxda( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.kmsxda.i32.v2i16(i32 [[T:%.*]], <2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int k_kmsxda(int t, int16x2_t a, int16x2_t b) { + return __rv__v_kmsxda(t, a, b); +} + +// CHECK-RV32-LABEL: @ksllw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksllw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ksllw(long a, unsigned int b) { + return __rv__ksllw(a, b); +} + +// CHECK-RV32-LABEL: @ksll8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksll8.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ksll8(unsigned long a, unsigned int b) { + return __rv__ksll8(a, b); +} + +// CHECK-RV32-LABEL: @v_ksll8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.ksll8.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_ksll8(int8x4_t a, unsigned int b) { + return __rv__v_ksll8(a, b); +} + +// CHECK-RV32-LABEL: @ksll16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksll16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ksll16(unsigned long a, unsigned int b) { + return __rv__ksll16(a, b); +} + +// CHECK-RV32-LABEL: @v_ksll16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.ksll16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_ksll16(int16x2_t a, unsigned int b) { + return __rv__v_ksll16(a, b); +} + +// CHECK-RV32-LABEL: @kslra8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslra8.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslra8(unsigned long a, int b) { + return __rv__kslra8(a, b); +} + +// CHECK-RV32-LABEL: @v_kslra8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.kslra8.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_kslra8(int8x4_t a, int b) { + return __rv__v_kslra8(a, b); +} + +// CHECK-RV32-LABEL: @kslra8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslra8.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslra8_u(unsigned long a, int b) { + return __rv__kslra8_u(a, b); +} + +// CHECK-RV32-LABEL: @v_kslra8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.kslra8.u.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_kslra8_u(int8x4_t a, int b) { + return __rv__v_kslra8_u(a, b); +} + +// CHECK-RV32-LABEL: @kslra16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslra16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslra16(unsigned long a, int b) { + return __rv__kslra16(a, b); +} + +// CHECK-RV32-LABEL: @v_kslra16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.kslra16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_kslra16(int16x2_t a, int b) { + return __rv__v_kslra16(a, b); +} + +// CHECK-RV32-LABEL: @kslra16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslra16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslra16_u(unsigned long a, int b) { + return __rv__kslra16(a, b); +} + +// CHECK-RV32-LABEL: @v_kslra16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.kslra16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_kslra16_u(int16x2_t a, int b) { + return __rv__v_kslra16(a, b); +} + +// CHECK-RV32-LABEL: @kslraw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslraw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslraw(int a, int b) { + return __rv__kslraw(a, b); +} + +// CHECK-RV32-LABEL: @kslraw_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kslraw.u.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kslraw_u(int a, int b) { + return __rv__kslraw_u(a, b); +} + +// CHECK-RV32-LABEL: @kstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kstas16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kstas16(unsigned long a, unsigned long b) { + return __rv__kstas16(a, b); +} + +// CHECK-RV32-LABEL: @v_kstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.kstas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_kstas16(int16x2_t a, int16x2_t b) { + return __rv__v_kstas16(a, b); +} + +// CHECK-RV32-LABEL: @kstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kstsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long kstsa16(unsigned long a, unsigned long b) { + return __rv__kstsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_kstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.kstsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_kstsa16(int16x2_t a, int16x2_t b) { + return __rv__v_kstsa16(a, b); +} + +// CHECK-RV32-LABEL: @ksub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksub8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ksub8(unsigned long a, unsigned long b) { + return __rv__ksub8(a, b); +} + +// CHECK-RV32-LABEL: @v_ksub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.ksub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_ksub8(int8x4_t a, int8x4_t b) { + return __rv__v_ksub8(a, b); +} + +// CHECK-RV32-LABEL: @ksub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksub16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ksub16(unsigned long a, unsigned long b) { + return __rv__ksub16(a, b); +} + +// CHECK-RV32-LABEL: @v_ksub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ksub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_ksub16(int16x2_t a, int16x2_t b) { + return __rv__v_ksub16(a, b); +} + +// CHECK-RV32-LABEL: @ksubh( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksubh.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ksubh(int a, int b) { + return __rv__ksubh(a, b); +} + +// CHECK-RV32-LABEL: @ksubw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ksubw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ksubw(int a, int b) { + return __rv__ksubw(a, b); +} + +// CHECK-RV32-LABEL: @kwmmul( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kwmmul.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kwmmul(long a, long b) { + return __rv__kwmmul(a, b); +} + +// CHECK-RV32-LABEL: @kwmmul_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.kwmmul.u.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long kwmmul_u(long a, long b) { + return __rv__kwmmul_u(a, b); +} + +// CHECK-RV32-LABEL: @maxw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.maxw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long maxw(int a, int b) { + return __rv__maxw(a, b); +} + +// CHECK-RV32-LABEL: @minw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.minw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long minw(int a, int b) { + return __rv__minw(a, b); +} + +// CHECK-RV32-LABEL: @pbsad( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pbsad.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pbsad(unsigned long a, unsigned long b) { + return __rv__pbsad(a, b); +} + +// CHECK-RV32-LABEL: @v_pbsad( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.pbsad.i32.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +unsigned long v_pbsad(uint8x4_t a, uint8x4_t b) { + return __rv__v_pbsad(a, b); +} + +// CHECK-RV32-LABEL: @pbsada( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pbsada.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pbsada(unsigned long t, unsigned long a, unsigned long b) { + return __rv__pbsada(t, a, b); +} + +// CHECK-RV32-LABEL: @v_pbsada( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.pbsada.i32.v4i8(i32 [[T:%.*]], <4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +unsigned long v_pbsada(unsigned long t, uint8x4_t a, uint8x4_t b) { + return __rv__v_pbsada(t, a, b); +} + +// CHECK-RV32-LABEL: @pkbb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pkbb16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pkbb16(unsigned long a, unsigned long b) { + return __rv__pkbb16(a, b); +} + +// CHECK-RV32-LABEL: @v_pkbb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.pkbb16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_pkbb16(uint16x2_t a, uint16x2_t b) { + return __rv__v_pkbb16(a, b); +} + +// CHECK-RV32-LABEL: @pkbt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pkbt16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pkbt16(unsigned long a, unsigned long b) { + return __rv__pkbt16(a, b); +} + +// CHECK-RV32-LABEL: @v_pkbt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.pkbt16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_pkbt16(uint16x2_t a, uint16x2_t b) { + return __rv__v_pkbt16(a, b); +} + +// CHECK-RV32-LABEL: @pktb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pktb16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pktb16(unsigned long a, unsigned long b) { + return __rv__pktb16(a, b); +} + +// CHECK-RV32-LABEL: @v_pktb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.pktb16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_pktb16(uint16x2_t a, uint16x2_t b) { + return __rv__v_pktb16(a, b); +} + +// CHECK-RV32-LABEL: @pktt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.pktt16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long pktt16(unsigned long a, unsigned long b) { + return __rv__pktt16(a, b); +} + +// CHECK-RV32-LABEL: @v_pktt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.pktt16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_pktt16(uint16x2_t a, uint16x2_t b) { + return __rv__v_pktt16(a, b); +} + +// CHECK-RV32-LABEL: @radd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.radd8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long radd8(unsigned a, unsigned b) { + return __rv__radd8(a, b); +} + +// CHECK-RV32-LABEL: @v_radd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.radd8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_radd8(int8x4_t a, int8x4_t b) { + return __rv__v_radd8(a, b); +} + +// CHECK-RV32-LABEL: @radd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.radd16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long radd16(unsigned long a, unsigned long b) { + return __rv__radd16(a, b); +} + +// CHECK-RV32-LABEL: @v_radd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.radd16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_radd16(int16x2_t a, int16x2_t b) { + return __rv__v_radd16(a, b); +} + +// CHECK-RV32-LABEL: @raddw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.raddw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long raddw(int a, int b) { + return __rv__raddw(a, b); +} + +// CHECK-RV32-LABEL: @rcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rcras16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rcras16(unsigned long a, unsigned long b) { + return __rv__rcras16(a, b); +} + +// CHECK-RV32-LABEL: @v_rcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.rcras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_rcras16(int16x2_t a, int16x2_t b) { + return __rv__v_rcras16(a, b); +} + +// CHECK-RV32-LABEL: @rcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rcrsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rcrsa16(unsigned long a, unsigned long b) { + return __rv__rcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_rcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.rcrsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_rcrsa16(int16x2_t a, int16x2_t b) { + return __rv__v_rcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @rstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rstas16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rstas16(unsigned long a, unsigned long b) { + return __rv__rstas16(a, b); +} + +// CHECK-RV32-LABEL: @v_rstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.rstas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_rstas16(int16x2_t a, int16x2_t b) { + return __rv__v_rstas16(a, b); +} + +// CHECK-RV32-LABEL: @rstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rstsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rstsa16(unsigned long a, unsigned long b) { + return __rv__rstsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_rstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.rstsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_rstsa16(int16x2_t a, int16x2_t b) { + return __rv__v_rstsa16(a, b); +} + +// CHECK-RV32-LABEL: @rsub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rsub8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rsub8(unsigned long a, unsigned long b) { + return __rv__rsub8(a, b); +} + +// CHECK-RV32-LABEL: @v_rsub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.rsub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_rsub8(int8x4_t a, int8x4_t b) { + return __rv__v_rsub8(a, b); +} + +// CHECK-RV32-LABEL: @rsub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rsub16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long rsub16(unsigned long a, unsigned long b) { + return __rv__rsub16(a, b); +} + +// CHECK-RV32-LABEL: @v_rsub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.rsub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_rsub16(int16x2_t a, int16x2_t b) { + return __rv__v_rsub16(a, b); +} + +// CHECK-RV32-LABEL: @rsubw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.rsubw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long rsubw(int a, int b) { + return __rv__rsubw(a, b); +} + +// CHECK-RV32-LABEL: @sclip8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sclip8.i32.i32(i32 [[A:%.*]], i32 5) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sclip8(unsigned long a) { + return __rv__sclip8(a, 5); +} + +// CHECK-RV32-LABEL: @v_sclip8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.sclip8.v4i8.i32(<4 x i8> [[TMP0]], i32 5) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_sclip8(int8x4_t a) { + return __rv__v_sclip8(a, 5); +} + +// CHECK-RV32-LABEL: @sclip16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sclip16.i32.i32(i32 [[A:%.*]], i32 6) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sclip16(unsigned long a) { + return __rv__sclip16(a, 6); +} + +// CHECK-RV32-LABEL: @v_sclip16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.sclip16.v2i16.i32(<2 x i16> [[TMP0]], i32 6) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sclip16(int16x2_t a) { + return __rv__v_sclip16(a, 6); +} + +// CHECK-RV32-LABEL: @slcip32( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sclip32.i32.i32(i32 [[A:%.*]], i32 7) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long slcip32(long a) { + return __rv__sclip32(a, 7); +} + +// CHECK-RV32-LABEL: @scmple8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.scmple8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long scmple8(unsigned long a, unsigned long b) { + return __rv__scmple8(a, b); +} + +// CHECK-RV32-LABEL: @v_scmple8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.scmple8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_scmple8(int8x4_t a, int8x4_t b) { + return __rv__v_scmple8(a, b); +} + +// CHECK-RV32-LABEL: @scmple16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.scmple16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long scmple16(unsigned long a, unsigned long b) { + return __rv__scmple16(a, b); +} + +// CHECK-RV32-LABEL: @v_scmple16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.scmple16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_scmple16(int16x2_t a, int16x2_t b) { + return __rv__v_scmple16(a, b); +} + +// CHECK-RV32-LABEL: @scmplt8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.scmplt8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long scmplt8(unsigned long a, unsigned long b) { + return __rv__scmplt8(a, b); +} + +// CHECK-RV32-LABEL: @v_scmplt8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.scmplt8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_scmplt8(int8x4_t a, int8x4_t b) { + return __rv__v_scmplt8(a, b); +} + +// CHECK-RV32-LABEL: @scmplt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.scmplt16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long scmplt16(unsigned long a, unsigned long b) { + return __rv__scmplt16(a, b); +} + +// CHECK-RV32-LABEL: @v_scmplt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.scmplt16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_scmplt16(int16x2_t a, int16x2_t b) { + return __rv__v_scmplt16(a, b); +} + +// CHECK-RV32-LABEL: @sll8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sll8.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sll8(unsigned long a, unsigned int b) { + return __rv__sll8(a, b); +} + +// CHECK-RV32-LABEL: @v_sll8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.sll8.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_sll8(uint8x4_t a, unsigned int b) { + return __rv__v_sll8(a, b); +} + +// CHECK-RV32-LABEL: @sll16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sll16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sll16(unsigned long a, unsigned int b) { + return __rv__sll16(a, b); +} + +// CHECK-RV32-LABEL: @v_sll16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.sll16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_sll16(uint16x2_t a, unsigned int b) { + return __rv__v_sll16(a, b); +} + +// CHECK-RV32-LABEL: @smaqa( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smaqa.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smaqa(long t, unsigned long a, unsigned long b) { + return __rv__smaqa(t, a, b); +} + +// CHECK-RV32-LABEL: @v_smaqa( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smaqa.i32.v4i8(i32 [[T:%.*]], <4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smaqa(int t, int8x4_t a, int8x4_t b) { + return __rv__v_smaqa(t, a, b); +} + +// CHECK-RV32-LABEL: @smaqa_su( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smaqa.su.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smaqa_su(long t, unsigned long a, unsigned long b) { + return __rv__smaqa_su(t, a, b); +} + +// CHECK-RV32-LABEL: @v_smaqa_su( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smaqa.su.i32.v4i8(i32 [[T:%.*]], <4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smaqa_su(int t, int8x4_t a, int8x4_t b) { + return __rv__v_smaqa_su(t, a, b); +} + +// CHECK-RV32-LABEL: @smax8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smax8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long smax8(unsigned long a, unsigned long b) { + return __rv__smax8(a, b); +} + +// CHECK-RV32-LABEL: @v_smax8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.smax8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_smax8(int8x4_t a, int8x4_t b) { + return __rv__v_smax8(a, b); +} + +// CHECK-RV32-LABEL: @smax16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smax16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long smax16(unsigned long a, unsigned long b) { + return __rv__smax16(a, b); +} + +// CHECK-RV32-LABEL: @v_smax16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.smax16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_smax16(int16x2_t a, int16x2_t b) { + return __rv__v_smax16(a, b); +} + +// CHECK-RV32-LABEL: @smbb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smbb16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smbb16(unsigned long a, unsigned long b) { + return __rv__smbb16(a, b); +} + +// CHECK-RV32-LABEL: @v_smbb16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smbb16.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smbb16(int16x2_t a, int16x2_t b) { + return __rv__v_smbb16(a, b); +} + +// CHECK-RV32-LABEL: @smbt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smbt16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smbt16(unsigned long a, unsigned long b) { + return __rv__smbt16(a, b); +} + +// CHECK-RV32-LABEL: @v_smbt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smbt16.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smbt16(int16x2_t a, int16x2_t b) { + return __rv__v_smbt16(a, b); +} + +// CHECK-RV32-LABEL: @smtt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smtt16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smtt16(unsigned long a, unsigned long b) { + return __rv__smtt16(a, b); +} + +// CHECK-RV32-LABEL: @v_smtt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smtt16.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smtt16(int16x2_t a, int16x2_t b) { + return __rv__v_smtt16(a, b); +} + +// CHECK-RV32-LABEL: @smds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smds.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smds(unsigned long a, unsigned long b) { + return __rv__smds(a, b); +} + +// CHECK-RV32-LABEL: @v_smds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smds.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smds(int16x2_t a, int16x2_t b) { + return __rv__v_smds(a, b); +} + +// CHECK-RV32-LABEL: @smdrs( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smdrs.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smdrs(unsigned long a, unsigned long b) { + return __rv__smdrs(a, b); +} + +// CHECK-RV32-LABEL: @v_smdrs( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smdrs.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smdrs(int16x2_t a, int16x2_t b) { + return __rv__v_smdrs(a, b); +} + +// CHECK-RV32-LABEL: @smxds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smxds.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smxds(unsigned long a, unsigned long b) { + return __rv__smxds(a, b); +} + +// CHECK-RV32-LABEL: @v_smxds( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.smxds.i32.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int v_smxds(int16x2_t a, int16x2_t b) { + return __rv__v_smxds(a, b); +} + +// CHECK-RV32-LABEL: @smin8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smin8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long smin8(unsigned long a, unsigned long b) { + return __rv__smin8(a, b); +} + +// CHECK-RV32-LABEL: @v_smin8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.smin8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_smin8(int8x4_t a, int8x4_t b) { + return __rv__v_smin8(a, b); +} + +// CHECK-RV32-LABEL: @smin16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smin16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long smin16(unsigned long a, unsigned long b) { + return __rv__smin16(a, b); +} + +// CHECK-RV32-LABEL: @v_smin16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.smin16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_smin16(int16x2_t a, int16x2_t b) { + return __rv__v_smin16(a, b); +} + +// CHECK-RV32-LABEL: @smmul( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmul.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmul(long a, long b) { + return __rv__smmul(a, b); +} + +// CHECK-RV32-LABEL: @smmul_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmul.u.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmul_u(long a, long b) { + return __rv__smmul_u(a, b); +} + +// CHECK-RV32-LABEL: @smmwb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmwb.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmwb(long a, unsigned long b) { + return __rv__smmwb(a, b); +} + +// CHECK-RV32-LABEL: @v_smmwb( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.smmwb.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_smmwb(int a, int16x2_t b) { + return __rv__v_smmwb(a, b); +} + +// CHECK-RV32-LABEL: @smmwb_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmwb.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmwb_u(long a, unsigned long b) { + return __rv__smmwb_u(a, b); +} + +// CHECK-RV32-LABEL: @v_smmwb_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.smmwb.u.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_smmwb_u(int a, int16x2_t b) { + return __rv__v_smmwb_u(a, b); +} + +// CHECK-RV32-LABEL: @smmwt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmwt.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmwt(long a, unsigned long b) { + return __rv__smmwt(a, b); +} + +// CHECK-RV32-LABEL: @v_smmwt( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.smmwt.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_smmwt(int a, int16x2_t b) { + return __rv__v_smmwt(a, b); +} + +// CHECK-RV32-LABEL: @smmwt_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.smmwt.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long smmwt_u(long a, unsigned long b) { + return __rv__smmwt_u(a, b); +} + +// CHECK-RV32-LABEL: @v_smmwt_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.riscv.smmwt.u.i32.v2i16(i32 [[A:%.*]], <2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: ret i32 [[TMP1]] +// +int v_smmwt_u(int a, int16x2_t b) { + return __rv__v_smmwt_u(a, b); +} + +// CHECK-RV32-LABEL: @sra_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sra.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long sra_u(long a, unsigned int b) { + return __rv__sra_u(a, b); +} + +// CHECK-RV32-LABEL: @sra8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sra8.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sra8(unsigned long a, unsigned int b) { + return __rv__sra8(a, b); +} + +// CHECK-RV32-LABEL: @v_sra8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.sra8.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_sra8(int8x4_t a, unsigned int b) { + return __rv__v_sra8(a, b); +} + +// CHECK-RV32-LABEL: @sra8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sra8.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sra8_u(unsigned long a, unsigned int b) { + return __rv__sra8_u(a, b); +} + +// CHECK-RV32-LABEL: @v_sra8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.sra8.u.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_sra8_u(int8x4_t a, unsigned int b) { + return __rv__v_sra8_u(a, b); +} + +// CHECK-RV32-LABEL: @sra16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sra16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sra16(unsigned long a, unsigned int b) { + return __rv__sra16(a, b); +} + +// CHECK-RV32-LABEL: @v_sra16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.sra16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sra16(int16x2_t a, unsigned int b) { + return __rv__v_sra16(a, b); +} + +// CHECK-RV32-LABEL: @sra16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sra16.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sra16_u(unsigned long a, unsigned int b) { + return __rv__sra16_u(a, b); +} + +// CHECK-RV32-LABEL: @v_sra16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.sra16.u.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sra16_u(int16x2_t a, unsigned int b) { + return __rv__v_sra16_u(a, b); +} + +// CHECK-RV32-LABEL: @srl8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.srl8.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long srl8(unsigned long a, unsigned int b) { + return __rv__srl8(a, b); +} + +// CHECK-RV32-LABEL: @v_srl8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.srl8.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_srl8(int8x4_t a, unsigned int b) { + return __rv__v_srl8(a, b); +} + +// CHECK-RV32-LABEL: @srl8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.srl8.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long srl8_u(unsigned long a, unsigned int b) { + return __rv__srl8_u(a, b); +} + +// CHECK-RV32-LABEL: @v_srl8_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.srl8.u.v4i8.i32(<4 x i8> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int8x4_t v_srl8_u(int8x4_t a, unsigned int b) { + return __rv__v_srl8_u(a, b); +} + +// CHECK-RV32-LABEL: @srl16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.srl16.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long srl16(unsigned long a, unsigned int b) { + return __rv__srl16(a, b); +} + +// CHECK-RV32-LABEL: @v_srl16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.srl16.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_srl16(int16x2_t a, unsigned int b) { + return __rv__v_srl16(a, b); +} + +// CHECK-RV32-LABEL: @srl16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.srl16.u.i32.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long srl16_u(unsigned long a, unsigned int b) { + return __rv__srl16_u(a, b); +} + +// CHECK-RV32-LABEL: @v_srl16_u( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.srl16.u.v2i16.i32(<2 x i16> [[TMP0]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_srl16_u(int16x2_t a, unsigned int b) { + return __rv__v_srl16_u(a, b); +} + +// CHECK-RV32-LABEL: @stas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.stas16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long stas16(unsigned long a, unsigned long b) { + return __rv__stas16(a, b); +} + +// CHECK-RV32-LABEL: @v_ustas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.stas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ustas16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ustas16(a, b); +} + +// CHECK-RV32-LABEL: @v_sstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.stas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_sstas16(int16x2_t a, int16x2_t b) { + return __rv__v_sstas16(a, b); +} + +// CHECK-RV32-LABEL: @stsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.stsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long stsa16(unsigned long a, unsigned long b) { + return __rv__stsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_ustsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.stsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ustsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ustsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_sstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.stsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_sstsa16(int16x2_t a, int16x2_t b) { + return __rv__v_sstsa16(a, b); +} + +// CHECK-RV32-LABEL: @sub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sub8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sub8(unsigned long a, unsigned long b) { + return __rv__sub8(a, b); +} + +// CHECK-RV32-LABEL: @v_usub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.sub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_usub8(uint8x4_t a, uint8x4_t b) { + return __rv__v_usub8(a, b); +} + +// CHECK-RV32-LABEL: @v_ssub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.sub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int8x4_t v_ssub8(int8x4_t a, int8x4_t b) { + return __rv__v_ssub8(a, b); +} + +// CHECK-RV32-LABEL: @sub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sub16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sub16(unsigned long a, unsigned long b) { + return __rv__sub16(a, b); +} + +// CHECK-RV32-LABEL: @v_usub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.sub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_usub16(uint16x2_t a, uint16x2_t b) { + return __rv__v_usub16(a, b); +} + +// CHECK-RV32-LABEL: @v_ssub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.sub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +int16x2_t v_ssub16(int16x2_t a, int16x2_t b) { + return __rv__v_ssub16(a, b); +} + +// CHECK-RV32-LABEL: @sunpkd810( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sunpkd810.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sunpkd810(unsigned long a) { + return __rv__sunpkd810(a); +} + +// CHECK-RV32-LABEL: @v_sunpkd810( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.sunpkd810.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sunpkd810(int8x4_t a) { + return __rv__v_sunpkd810(a); +} + +// CHECK-RV32-LABEL: @sunpkd820( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sunpkd820.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sunpkd820(unsigned long a) { + return __rv__sunpkd820(a); +} + +// CHECK-RV32-LABEL: @v_sunpkd820( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.sunpkd820.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sunpkd820(int8x4_t a) { + return __rv__v_sunpkd820(a); +} + +// CHECK-RV32-LABEL: @sunpkd830( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sunpkd830.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sunpkd830(unsigned long a) { + return __rv__sunpkd830(a); +} + +// CHECK-RV32-LABEL: @v_sunpkd830( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.sunpkd830.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sunpkd830(int8x4_t a) { + return __rv__v_sunpkd830(a); +} + +// CHECK-RV32-LABEL: @sunpkd831( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sunpkd831.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sunpkd831(unsigned long a) { + return __rv__sunpkd831(a); +} + +// CHECK-RV32-LABEL: @v_sunpkd831( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.sunpkd831.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sunpkd831(int8x4_t a) { + return __rv__v_sunpkd831(a); +} + +// CHECK-RV32-LABEL: @sunpkd832( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.sunpkd832.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long sunpkd832(unsigned long a) { + return __rv__sunpkd832(a); +} + +// CHECK-RV32-LABEL: @v_sunpkd832( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.sunpkd832.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +int16x2_t v_sunpkd832(int8x4_t a) { + return __rv__v_sunpkd832(a); +} + +// CHECK-RV32-LABEL: @swap8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.swap8.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long swap8(unsigned long a) { + return __rv__swap8(a); +} + +// CHECK-RV32-LABEL: @v_swap8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.swap8.v4i8(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_swap8(uint8x4_t a) { + return __rv__v_swap8(a); +} + +// CHECK-RV32-LABEL: @swap16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.swap16.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long swap16(unsigned long a) { + return __rv__swap16(a); +} + +// CHECK-RV32-LABEL: @v_swap16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.swap16.v2i16(<2 x i16> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_swap16(uint16x2_t a) { + return __rv__v_swap16(a); +} + +// CHECK-RV32-LABEL: @uclip8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uclip8.i32.i32(i32 [[A:%.*]], i32 5) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uclip8(unsigned long a) { + return __rv__uclip8(a, 5); +} + +// CHECK-RV32-LABEL: @v_uclip8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <4 x i8> @llvm.riscv.uclip8.v4i8.i32(<4 x i8> [[TMP0]], i32 5) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint8x4_t v_uclip8(uint8x4_t a) { + return __rv__v_uclip8(a, 5); +} + +// CHECK-RV32-LABEL: @uclip16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uclip16.i32.i32(i32 [[A:%.*]], i32 6) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uclip16(unsigned long a) { + return __rv__uclip16(a, 6); +} + +// CHECK-RV32-LABEL: @v_uclip16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.uclip16.v2i16.i32(<2 x i16> [[TMP0]], i32 6) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_uclip16(uint16x2_t a) { + return __rv__v_uclip16(a, 6); +} + +// CHECK-RV32-LABEL: @uclip32( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uclip32.i32.i32(i32 [[A:%.*]], i32 7) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long uclip32(long a) { + return __rv__uclip32(a, 7); +} + +// CHECK-RV32-LABEL: @ucmple8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ucmple8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ucmple8(unsigned long a, unsigned long b) { + return __rv__ucmple8(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmple8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.ucmple8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_ucmple8(uint8x4_t a, uint8x4_t b) { + return __rv__v_ucmple8(a, b); +} + +// CHECK-RV32-LABEL: @ucmple16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ucmple16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ucmple16(unsigned long a, unsigned long b) { + return __rv__ucmple16(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmple16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ucmple16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ucmple16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ucmple16(a, b); +} + +// CHECK-RV32-LABEL: @ucmplt8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ucmplt8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ucmplt8(unsigned long a, unsigned long b) { + return __rv__ucmplt8(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmplt8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.ucmplt8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_ucmplt8(uint8x4_t a, uint8x4_t b) { + return __rv__v_ucmplt8(a, b); +} + +// CHECK-RV32-LABEL: @ucmplt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ucmplt16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ucmplt16(unsigned long a, unsigned long b) { + return __rv__ucmplt16(a, b); +} + +// CHECK-RV32-LABEL: @v_ucmplt16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ucmplt16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ucmplt16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ucmplt16(a, b); +} + +// CHECK-RV32-LABEL: @ukadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukadd8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukadd8(unsigned long a, unsigned long b) { + return __rv__ukadd8(a, b); +} + +// CHECK-RV32-LABEL: @v_ukadd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.ukadd8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_ukadd8(uint8x4_t a, uint8x4_t b) { + return __rv__v_ukadd8(a, b); +} + +// CHECK-RV32-LABEL: @ukadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukadd16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukadd16(unsigned long a, unsigned long b) { + return __rv__ukadd16(a, b); +} + +// CHECK-RV32-LABEL: @v_ukadd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ukadd16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ukadd16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ukadd16(a, b); +} + +// CHECK-RV32-LABEL: @ukaddh( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukaddh.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ukaddh(int a, int b) { + return __rv__ukaddh(a, b); +} + +// CHECK-RV32-LABEL: @ukaddw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukaddw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +long ukaddw(int a, int b) { + return __rv__ukaddw(a, b); +} + +// CHECK-RV32-LABEL: @ukcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukcras16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukcras16(unsigned long a, unsigned long b) { + return __rv__ukcras16(a, b); +} + +// CHECK-RV32-LABEL: @v_ukcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ukcras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ukcras16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ukcras16(a, b); +} + +// CHECK-RV32-LABEL: @ukcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukcrsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukcrsa16(unsigned long a, unsigned long b) { + return __rv__ukcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_ukcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ukcrsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ukcrsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ukcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @ukstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukstas16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukstas16(unsigned long a, unsigned long b) { + return __rv__ukstas16(a, b); +} + +// CHECK-RV32-LABEL: @v_ukstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ukstas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ukstas16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ukstas16(a, b); +} + +// CHECK-RV32-LABEL: @ukstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ukstsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ukstsa16(unsigned long a, unsigned long b) { + return __rv__ukstsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_ukstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ukstsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ukstsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ukstsa16(a, b); +} + +// CHECK-RV32-LABEL: @uksub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uksub8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uksub8(unsigned long a, unsigned long b) { + return __rv__uksub8(a, b); +} + +// CHECK-RV32-LABEL: @v_uksub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.uksub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_uksub8(uint8x4_t a, uint8x4_t b) { + return __rv__v_uksub8(a, b); +} + +// CHECK-RV32-LABEL: @uksub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uksub16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uksub16(unsigned long a, unsigned long b) { + return __rv__uksub16(a, b); +} + +// CHECK-RV32-LABEL: @v_uksub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.uksub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_uksub16(uint16x2_t a, uint16x2_t b) { + return __rv__v_uksub16(a, b); +} + +// CHECK-RV32-LABEL: @uksubh( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uksubh.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uksubh(unsigned int a, unsigned int b) { + return __rv__uksubh(a, b); +} + +// CHECK-RV32-LABEL: @uksubw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uksubw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uksubw(unsigned int a, unsigned int b) { + return __rv__uksubw(a, b); +} + +// CHECK-RV32-LABEL: @umaqa( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.umaqa.i32.i32(i32 [[T:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long umaqa(unsigned long t, unsigned long a, unsigned long b) { + return __rv__umaqa(t, a, b); +} + +// CHECK-RV32-LABEL: @v_umaqa( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.riscv.umaqa.i32.v4i8(i32 [[T:%.*]], <4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +unsigned int v_umaqa(unsigned int t, uint8x4_t a, uint8x4_t b) { + return __rv__v_umaqa(t, a, b); +} + +// CHECK-RV32-LABEL: @umax8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.umax8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long umax8(unsigned long a, unsigned long b) { + return __rv__umax8(a, b); +} + +// CHECK-RV32-LABEL: @v_umax8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.umax8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_umax8(uint8x4_t a, uint8x4_t b) { + return __rv__v_umax8(a, b); +} + +// CHECK-RV32-LABEL: @umax16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.umax8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long umax16(unsigned long a, unsigned long b) { + return __rv__umax8(a, b); +} + +// CHECK-RV32-LABEL: @v_umax16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.umax16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_umax16(uint16x2_t a, uint16x2_t b) { + return __rv__v_umax16(a, b); +} + +// CHECK-RV32-LABEL: @umin8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.umin8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long umin8(unsigned long a, unsigned long b) { + return __rv__umin8(a, b); +} + +// CHECK-RV32-LABEL: @v_umin8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.umin8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_umin8(uint8x4_t a, uint8x4_t b) { + return __rv__v_umin8(a, b); +} + +// CHECK-RV32-LABEL: @umin16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.umin8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long umin16(unsigned long a, unsigned long b) { + return __rv__umin8(a, b); +} + +// CHECK-RV32-LABEL: @v_umin16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.umin16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_umin16(uint16x2_t a, uint16x2_t b) { + return __rv__v_umin16(a, b); +} + +// CHECK-RV32-LABEL: @uradd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uradd8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uradd8(unsigned long a, unsigned long b) { + return __rv__uradd8(a, b); +} + +// CHECK-RV32-LABEL: @v_uradd8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.uradd8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_uradd8(uint8x4_t a, uint8x4_t b) { + return __rv__v_uradd8(a, b); +} + +// CHECK-RV32-LABEL: @uradd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uradd16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uradd16(unsigned long a, unsigned long b) { + return __rv__uradd16(a, b); +} + +// CHECK-RV32-LABEL: @v_uradd16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.uradd16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_uradd16(uint16x2_t a, uint16x2_t b) { + return __rv__v_uradd16(a, b); +} + +// CHECK-RV32-LABEL: @uraddw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.uraddw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long uraddw(unsigned int a, unsigned int b) { + return __rv__uraddw(a, b); +} + +// CHECK-RV32-LABEL: @urcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.urcras16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long urcras16(unsigned long a, unsigned long b) { + return __rv__urcras16(a, b); +} + +// CHECK-RV32-LABEL: @v_urcras16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.urcras16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_urcras16(uint16x2_t a, uint16x2_t b) { + return __rv__v_urcras16(a, b); +} + +// CHECK-RV32-LABEL: @urcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.urcrsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long urcrsa16(unsigned long a, unsigned long b) { + return __rv__urcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_urcrsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.urcrsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_urcrsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_urcrsa16(a, b); +} + +// CHECK-RV32-LABEL: @urstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.urstas16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long urstas16(unsigned long a, unsigned long b) { + return __rv__urstas16(a, b); +} + +// CHECK-RV32-LABEL: @v_urstas16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.urstas16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_urstas16(uint16x2_t a, uint16x2_t b) { + return __rv__v_urstas16(a, b); +} + +// CHECK-RV32-LABEL: @urstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.urstsa16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long urstsa16(unsigned long a, unsigned long b) { + return __rv__urstsa16(a, b); +} + +// CHECK-RV32-LABEL: @v_urstsa16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.urstsa16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_urstsa16(uint16x2_t a, uint16x2_t b) { + return __rv__v_urstsa16(a, b); +} + +// CHECK-RV32-LABEL: @ursub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ursub8.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ursub8(unsigned long a, unsigned long b) { + return __rv__ursub8(a, b); +} + +// CHECK-RV32-LABEL: @v_ursub8( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.ursub8.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint8x4_t v_ursub8(uint8x4_t a, uint8x4_t b) { + return __rv__v_ursub8(a, b); +} + +// CHECK-RV32-LABEL: @ursub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ursub16.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ursub16(unsigned long a, unsigned long b) { + return __rv__ursub16(a, b); +} + +// CHECK-RV32-LABEL: @v_ursub16( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// CHECK-RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.ursub16.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// CHECK-RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP3]] +// +uint16x2_t v_ursub16(uint16x2_t a, uint16x2_t b) { + return __rv__v_ursub16(a, b); +} + +// CHECK-RV32-LABEL: @ursubw( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.ursubw.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long ursubw(unsigned int a, unsigned int b) { + return __rv__ursubw(a, b); +} + +// CHECK-RV32-LABEL: @zunpkd810( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.zunpkd810.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long zunpkd810(unsigned long a) { + return __rv__zunpkd810(a); +} + +// CHECK-RV32-LABEL: @v_zunpkd810( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.zunpkd810.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_zunpkd810(uint8x4_t a) { + return __rv__v_zunpkd810(a); +} + +// CHECK-RV32-LABEL: @zunpkd820( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.zunpkd820.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long zunpkd820(unsigned long a) { + return __rv__zunpkd820(a); +} + +// CHECK-RV32-LABEL: @v_zunpkd820( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.zunpkd820.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_zunpkd820(uint8x4_t a) { + return __rv__v_zunpkd820(a); +} + +// CHECK-RV32-LABEL: @zunpkd830( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.zunpkd830.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long zunpkd830(unsigned long a) { + return __rv__zunpkd830(a); +} + +// CHECK-RV32-LABEL: @v_zunpkd830( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.zunpkd830.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_zunpkd830(uint8x4_t a) { + return __rv__v_zunpkd830(a); +} + +// CHECK-RV32-LABEL: @zunpkd831( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.zunpkd831.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long zunpkd831(unsigned long a) { + return __rv__zunpkd831(a); +} + +// CHECK-RV32-LABEL: @v_zunpkd831( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.zunpkd831.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_zunpkd831(uint8x4_t a) { + return __rv__v_zunpkd831(a); +} + +// CHECK-RV32-LABEL: @zunpkd832( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.riscv.zunpkd832.i32(i32 [[A:%.*]]) +// CHECK-RV32-NEXT: ret i32 [[TMP0]] +// +unsigned long zunpkd832(unsigned long a) { + return __rv__zunpkd832(a); +} + +// CHECK-RV32-LABEL: @v_zunpkd832( +// CHECK-RV32-NEXT: entry: +// CHECK-RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// CHECK-RV32-NEXT: [[TMP1:%.*]] = tail call <2 x i16> @llvm.riscv.v.zunpkd832.v2i16(<4 x i8> [[TMP0]]) +// CHECK-RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32 +// CHECK-RV32-NEXT: ret i32 [[TMP2]] +// +uint16x2_t v_zunpkd832(uint8x4_t a) { + return __rv__v_zunpkd832(a); +} diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/rv64p.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/rv64p.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/rv64p.c @@ -0,0 +1,3906 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -O2 -target-feature +experimental-p \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-RV64 %s + +typedef signed char int8x4_t __attribute((vector_size(4))); +typedef signed char int8x8_t __attribute((vector_size(8))); +typedef short int16x2_t __attribute((vector_size(4))); +typedef short int16x4_t __attribute__((vector_size(8))); +typedef short int16x8_t __attribute__((vector_size(16))); +typedef int int32x2_t __attribute__((vector_size(8))); +typedef int int32x4_t __attribute__((vector_size(16))); +typedef unsigned char uint8x4_t __attribute__((vector_size(4))); +typedef unsigned char uint8x8_t __attribute__((vector_size(8))); +typedef unsigned short uint16x2_t __attribute__((vector_size(4))); +typedef unsigned short uint16x4_t __attribute__((vector_size(8))); +typedef unsigned short uint16x8_t __attribute__((vector_size(16))); +typedef unsigned int uint32x2_t __attribute__((vector_size(8))); +typedef unsigned int uint32x4_t __attribute__((vector_size(16))); + +// CHECK-RV64-LABEL: @add8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.add8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long add8(unsigned long a, unsigned long b) { + return __rv__add8(a, b); +} + +// CHECK-RV64-LABEL: @v_uadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.add8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_uadd8(uint8x8_t a, uint8x8_t b) { + return __rv__v_uadd8(a, b); +} + +// CHECK-RV64-LABEL: @v_sadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.add8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_sadd8(int8x8_t a, int8x8_t b) { + return __rv__v_sadd8(a, b); +} + +// CHECK-RV64-LABEL: @add16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.add16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long add16(unsigned long a, unsigned long b) { + return __rv__add16(a, b); +} + +// CHECK-RV64-LABEL: @uadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.add16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t uadd16(uint16x4_t a, uint16x4_t b) { + return __rv__v_uadd16(a, b); +} + +// CHECK-RV64-LABEL: @sadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.add16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t sadd16(int16x4_t a, int16x4_t b) { + return __rv__v_sadd16(a, b); +} + +// CHECK-RV64-LABEL: @ave( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ave.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ave(long a, long b) { + return __rv__ave(a, b); +} + +// CHECK-RV64-LABEL: @bitrev( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.bitrev.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long bitrev(unsigned long a, unsigned long b) { + return __rv__bitrev(a, b); +} + +// CHECK-RV64-LABEL: @bpick( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.bpick.i64(i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long bpick(unsigned long a, unsigned long b, unsigned long c) { + return __rv__bpick(a, b, c); +} + +// CHECK-RV64-LABEL: @clrs8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clrs8.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clrs8(unsigned long a) { + return __rv__clrs8(a); +} + +// CHECK-RV64-LABEL: @v_clrs8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.clrs8.v8i8(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint8x8_t v_clrs8(int8x8_t a) { + return __rv__v_clrs8(a); +} + +// CHECK-RV64-LABEL: @clrs16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clrs16.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clrs16(unsigned long a) { + return __rv__clrs16(a); +} + +// CHECK-RV64-LABEL: @v_clrs16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.clrs16.v4i16(<4 x i16> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_clrs16(int16x4_t a) { + return __rv__v_clrs16(a); +} + +// CHECK-RV64-LABEL: @clrs32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clrs32.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clrs32(unsigned long a) { + return __rv__clrs32(a); +} + +// CHECK-RV64-LABEL: @v_clrs32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.riscv.clrs32.v2i32(<2 x i32> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint32x2_t v_clrs32(int32x2_t a) { + return __rv__v_clrs32(a); +} + +// CHECK-RV64-LABEL: @clo8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clo8.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clo8(unsigned long a) { + return __rv__clo8(a); +} + +// CHECK-RV64-LABEL: @v_clo8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.clo8.v8i8(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint8x8_t v_clo8(uint8x8_t a) { + return __rv__v_clo8(a); +} + +// CHECK-RV64-LABEL: @clo16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clo16.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clo16(unsigned long a) { + return __rv__clo16(a); +} + +// CHECK-RV64-LABEL: @v_clo16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.clo16.v4i16(<4 x i16> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_clo16(uint16x4_t a) { + return __rv__v_clo16(a); +} + +// CHECK-RV64-LABEL: @clo32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clo32.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clo32(unsigned long a) { + return __rv__clo32(a); +} + +// CHECK-RV64-LABEL: @v_clo32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.riscv.clo32.v2i32(<2 x i32> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint32x2_t v_clo32(uint32x2_t a) { + return __rv__v_clo32(a); +} + +// CHECK-RV64-LABEL: @clz8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clz8.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clz8(unsigned long a) { + return __rv__clz8(a); +} + +// CHECK-RV64-LABEL: @v_clz8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.clz8.v8i8(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint8x8_t v_clz8(uint8x8_t a) { + return __rv__v_clz8(a); +} + +// CHECK-RV64-LABEL: @clz16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clz16.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clz16(unsigned long a) { + return __rv__clz16(a); +} + +// CHECK-RV64-LABEL: @v_clz16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.clz16.v4i16(<4 x i16> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_clz16(uint16x4_t a) { + return __rv__v_clz16(a); +} + +// CHECK-RV64-LABEL: @clz32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.clz32.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long clz32(unsigned long a) { + return __rv__clz32(a); +} + +// CHECK-RV64-LABEL: @v_clz32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.riscv.clz32.v2i32(<2 x i32> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint32x2_t v_clz32(uint32x2_t a) { + return __rv__v_clz32(a); +} + +// CHECK-RV64-LABEL: @cmpeq8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.cmpeq8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long cmpeq8(unsigned long a, unsigned long b) { + return __rv__cmpeq8(a, b); +} + +// CHECK-RV64-LABEL: @v_scmpeq8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.cmpeq8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_scmpeq8(int8x8_t a, int8x8_t b) { + return __rv__v_scmpeq8(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmpeq8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.cmpeq8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_ucmpeq8(uint8x8_t a, uint8x8_t b) { + return __rv__v_ucmpeq8(a, b); +} + +// CHECK-RV64-LABEL: @cmpeq16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.cmpeq16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long cmpeq16(unsigned long a, unsigned long b) { + return __rv__cmpeq16(a, b); +} + +// CHECK-RV64-LABEL: @v_scmpeq16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.cmpeq16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_scmpeq16(int16x4_t a, int16x4_t b) { + return __rv__v_scmpeq16(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmpeq16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.cmpeq16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ucmpeq16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ucmpeq16(a, b); +} + +// CHECK-RV64-LABEL: @cras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.cras16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long cras16(unsigned long a, unsigned long b) { + return __rv__cras16(a, b); +} + +// CHECK-RV64-LABEL: @v_ucras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.cras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ucras16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ucras16(a, b); +} + +// CHECK-RV64-LABEL: @v_scras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.cras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_scras16(int16x4_t a, int16x4_t b) { + return __rv__v_scras16(a, b); +} + +// CHECK-RV64-LABEL: @crsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.crsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long crsa16(unsigned long a, unsigned long b) { + return __rv__crsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_ucrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.crsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ucrsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ucrsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_scrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.crsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_scrsa16(int16x4_t a, int16x4_t b) { + return __rv__v_scrsa16(a, b); +} + +// CHECK-RV64-LABEL: @insb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.insb.i64(i64 [[A:%.*]], i64 [[B:%.*]], i64 5) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long insb(unsigned long a, unsigned long b) { + return __rv__insb(a, b, 5); +} + +// CHECK-RV64-LABEL: @kabs8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kabs8.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kabs8(unsigned long a) { + return __rv__kabs8(a); +} + +// CHECK-RV64-LABEL: @v_kabs8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.kabs8.v8i8(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_kabs8(int8x8_t a) { + return __rv__v_kabs8(a); +} + +// CHECK-RV64-LABEL: @kabs16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kabs16.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kabs16(unsigned long a) { + return __rv__kabs16(a); +} + +// CHECK-RV64-LABEL: @v_kabs16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.kabs16.v4i16(<4 x i16> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_kabs16(int16x4_t a) { + return __rv__v_kabs16(a); +} + +// CHECK-RV64-LABEL: @kabsw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kabsw.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kabsw(long a) { + return __rv__kabsw(a); +} + +// CHECK-RV64-LABEL: @kadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kadd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kadd8(unsigned long a, unsigned long b) { + return __rv__kadd8(a, b); +} + +// CHECK-RV64-LABEL: @v_kadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.kadd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_kadd8(int8x8_t a, int8x8_t b) { + return __rv__v_kadd8(a, b); +} + +// CHECK-RV64-LABEL: @kadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kadd16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kadd16(unsigned long a, unsigned long b) { + return __rv__kadd16(a, b); +} + +// CHECK-RV64-LABEL: @v_kadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.kadd16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_kadd16(int16x4_t a, int16x4_t b) { + return __rv__v_kadd16(a, b); +} + +// CHECK-RV64-LABEL: @kaddh( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kaddh.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kaddh(int a, int b) { + return __rv__kaddh(a, b); +} + +// CHECK-RV64-LABEL: @kaddw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kaddw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kaddw(int a, int b) { + return __rv__kaddw(a, b); +} + +// CHECK-RV64-LABEL: @kcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kcras16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kcras16(unsigned long a, unsigned long b) { + return __rv__kcras16(a, b); +} + +// CHECK-RV64-LABEL: @v_kcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.kcras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_kcras16(int16x4_t a, int16x4_t b) { + return __rv__v_kcras16(a, b); +} + +// CHECK-RV64-LABEL: @kcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kcrsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kcrsa16(unsigned long a, unsigned long b) { + return __rv__kcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_kcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.kcrsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_kcrsa16(int16x4_t a, int16x4_t b) { + return __rv__v_kcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @kdmbb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmbb.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmbb(unsigned int a, unsigned int b) { + return __rv__kdmbb(a, b); +} + +// CHECK-RV64-LABEL: @v_kdmbb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmbb.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmbb(int16x4_t a, int16x4_t b) { + return __rv__v_kdmbb(a, b); +} + +// CHECK-RV64-LABEL: @kdmbt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmbt.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmbt(unsigned int a, unsigned int b) { + return __rv__kdmbt(a, b); +} + +// CHECK-RV64-LABEL: @v_kdmbt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmbt.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmbt(int16x4_t a, int16x4_t b) { + return __rv__v_kdmbt(a, b); +} + +// CHECK-RV64-LABEL: @kdmtt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmtt.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmtt(unsigned int a, unsigned int b) { + return __rv__kdmtt(a, b); +} + +// CHECK-RV64-LABEL: @v_kdmtt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmtt.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmtt(int16x4_t a, int16x4_t b) { + return __rv__v_kdmtt(a, b); +} + +// CHECK-RV64-LABEL: @kdmabb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmabb.i64.i64(i64 [[T:%.*]], i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmabb(long t, unsigned int a, unsigned int b) { + return __rv__kdmabb(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kdmabb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmabb.i64.v4i16(i64 [[T:%.*]], <4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmabb(long t, int16x4_t a, int16x4_t b) { + return __rv__v_kdmabb(t, a, b); +} + +// CHECK-RV64-LABEL: @kdmabt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmabt.i64.i64(i64 [[T:%.*]], i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmabt(long t, unsigned int a, unsigned int b) { + return __rv__kdmabt(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kdmabt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmabt.i64.v4i16(i64 [[T:%.*]], <4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmabt(long t, int16x4_t a, int16x4_t b) { + return __rv__v_kdmabt(t, a, b); +} + +// CHECK-RV64-LABEL: @kdmatt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kdmatt.i64.i64(i64 [[T:%.*]], i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kdmatt(long t, unsigned int a, unsigned int b) { + return __rv__kdmatt(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kdmatt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.kdmatt.i64.v4i16(i64 [[T:%.*]], <4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_kdmatt(long t, int16x4_t a, int16x4_t b) { + return __rv__v_kdmatt(t, a, b); +} + +// CHECK-RV64-LABEL: @khm8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khm8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long khm8(unsigned long a, unsigned long b) { + return __rv__khm8(a, b); +} + +// CHECK-RV64-LABEL: @v_khm8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.khm8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_khm8(int8x8_t a, int8x8_t b) { + return __rv__v_khm8(a, b); +} + +// CHECK-RV64-LABEL: @khmx8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khmx8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long khmx8(unsigned long a, unsigned long b) { + return __rv__khmx8(a, b); +} + +// CHECK-RV64-LABEL: @v_khmx8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.khmx8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_khmx8(int8x8_t a, int8x8_t b) { + return __rv__v_khmx8(a, b); +} + +// CHECK-RV64-LABEL: @khm16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khm16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long khm16(unsigned long a, unsigned long b) { + return __rv__khm16(a, b); +} + +// CHECK-RV64-LABEL: @v_khm16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.khm16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_khm16(int16x4_t a, int16x4_t b) { + return __rv__v_khm16(a, b); +} + +// CHECK-RV64-LABEL: @khmx16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khmx16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long khmx16(unsigned long a, unsigned long b) { + return __rv__khmx16(a, b); +} + +// CHECK-RV64-LABEL: @v_khmx16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.khmx16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_khmx16(int16x4_t a, int16x4_t b) { + return __rv__v_khmx16(a, b); +} + +// CHECK-RV64-LABEL: @khmbb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khmbb.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long khmbb(unsigned int a, unsigned int b) { + return __rv__khmbb(a, b); +} + +// CHECK-RV64-LABEL: @v_khmbb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.khmbb.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_khmbb(int16x4_t a, int16x4_t b) { + return __rv__v_khmbb(a, b); +} + +// CHECK-RV64-LABEL: @khmbt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khmbt.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long khmbt(unsigned int a, unsigned int b) { + return __rv__khmbt(a, b); +} + +// CHECK-RV64-LABEL: @v_khmbt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.khmbt.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_khmbt(int16x4_t a, int16x4_t b) { + return __rv__v_khmbt(a, b); +} + +// CHECK-RV64-LABEL: @khmtt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.khmtt.i64.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long khmtt(unsigned int a, unsigned int b) { + return __rv__khmtt(a, b); +} + +// CHECK-RV64-LABEL: @v_khmtt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.khmtt.i64.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +long v_khmtt(int16x4_t a, int16x4_t b) { + return __rv__v_khmtt(a, b); +} + +// CHECK-RV64-LABEL: @kmabb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmabb.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmabb(long t, unsigned long a, unsigned long b) { + return __rv__kmabb(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmabb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmabb.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmabb(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmabb(t, a, b); +} + +// CHECK-RV64-LABEL: @kmabt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmabt.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmabt(long t, unsigned long a, unsigned long b) { + return __rv__kmabt(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmabt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmabt.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmabt(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmabt(t, a, b); +} + +// CHECK-RV64-LABEL: @kmatt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmatt.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmatt(long t, unsigned long a, unsigned long b) { + return __rv__kmatt(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmatt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmatt.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmatt(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmatt(t, a, b); +} + +// CHECK-RV64-LABEL: @kmada( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmada.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmada(long t, unsigned long a, unsigned long b) { + return __rv__kmada(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmada( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmada.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmada(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmada(t, a, b); +} + +// CHECK-RV64-LABEL: @kmaxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmaxda.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmaxda(long t, unsigned long a, unsigned long b) { + return __rv__kmaxda(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmaxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmaxda.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmaxda(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmaxda(t, a, b); +} + +// CHECK-RV64-LABEL: @kmads( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmads.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmads(long t, unsigned long a, unsigned long b) { + return __rv__kmads(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmads( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmads.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmads(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmads(t, a, b); +} + +// CHECK-RV64-LABEL: @kmadrs( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmadrs.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmadrs(long t, unsigned long a, unsigned long b) { + return __rv__kmadrs(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmadrs( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmadrs.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmadrs(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmadrs(t, a, b); +} + +// CHECK-RV64-LABEL: @kmaxds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmaxds.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmaxds(long t, unsigned long a, unsigned long b) { + return __rv__kmaxds(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmaxds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmaxds.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmaxds(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmaxds(t, a, b); +} + +// CHECK-RV64-LABEL: @kmda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmda.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmda(unsigned long a, unsigned long b) { + return __rv__kmda(a, b); +} + +// CHECK-RV64-LABEL: @v_kmda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmda.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmda(int16x4_t a, int16x4_t b) { + return __rv__v_kmda(a, b); +} + +// CHECK-RV64-LABEL: @kmxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmxda.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmxda(unsigned long a, unsigned long b) { + return __rv__kmxda(a, b); +} + +// CHECK-RV64-LABEL: @v_kmxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmxda.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmxda(int16x4_t a, int16x4_t b) { + return __rv__v_kmxda(a, b); +} + +// CHECK-RV64-LABEL: @kmmac( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmac.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmac(long t, long a, long b) { + return __rv__kmmac(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmac( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmac.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmac(int32x2_t t, int32x2_t a, int32x2_t b) { + return __rv__v_kmmac(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmac_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmac.u.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmac_u(long t, long a, long b) { + return __rv__kmmac_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmac_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmac.u.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmac_u(int32x2_t t, int32x2_t a, int32x2_t b) { + return __rv__v_kmmac_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawb.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawb(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawb.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawb(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawb(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawb.u.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawb_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawb.u.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawb_u(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawb_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawb2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawb2.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawb2(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb2(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawb2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawb2.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawb2(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawb2(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawb2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawb2.u.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawb2_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawb2_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawb2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawb2.u.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawb2_u(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawb2_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawt.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawt(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawt.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawt(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawt(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawt_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawt.u.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawt_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawt_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawt.u.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawt_u(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawt_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawt2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawt2.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawt2(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt2(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawt2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawt2.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawt2(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawt2(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmawt2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmawt2.u.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmawt2_u(long t, unsigned long a, unsigned long b) { + return __rv__kmmawt2_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmawt2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmawt2.u.v2i32.v4i16(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmawt2_u(int32x2_t t, int32x2_t a, int16x4_t b) { + return __rv__v_kmmawt2_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmsb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmsb.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmsb(long t, long a, long b) { + return __rv__kmmsb(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmsb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmsb.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmsb(int32x2_t t, int32x2_t a, int32x2_t b) { + return __rv__v_kmmsb(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmsb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmsb.u.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmsb_u(long t, long a, long b) { + return __rv__kmmsb_u(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmmsb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmmsb.u.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmmsb_u(int32x2_t t, int32x2_t a, int32x2_t b) { + return __rv__v_kmmsb_u(t, a, b); +} + +// CHECK-RV64-LABEL: @kmmwb2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmwb2.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmwb2(long a, unsigned long b) { + return __rv__kmmwb2(a, b); +} + +// CHECK-RV64-LABEL: @v_kmmwb2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmmwb2.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmmwb2(int32x2_t a, int16x4_t b) { + return __rv__v_kmmwb2(a, b); +} + +// CHECK-RV64-LABEL: @kmmwb2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmwb2.u.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmwb2_u(long a, unsigned long b) { + return __rv__kmmwb2_u(a, b); +} + +// CHECK-RV64-LABEL: @v_kmmwb2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmmwb2.u.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmmwb2_u(int32x2_t a, int16x4_t b) { + return __rv__v_kmmwb2_u(a, b); +} + +// CHECK-RV64-LABEL: @kmmwt2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmwt2.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmwt2(long a, unsigned long b) { + return __rv__kmmwt2(a, b); +} + +// CHECK-RV64-LABEL: @v_kmmwt2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmmwt2.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmmwt2(int32x2_t a, int16x4_t b) { + return __rv__v_kmmwt2(a, b); +} + +// CHECK-RV64-LABEL: @kmmwt2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmmwt2.u.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmmwt2_u(long a, unsigned long b) { + return __rv__kmmwt2_u(a, b); +} + +// CHECK-RV64-LABEL: @v_kmmwt2_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kmmwt2.u.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kmmwt2_u(int32x2_t a, int16x4_t b) { + return __rv__v_kmmwt2_u(a, b); +} + +// CHECK-RV64-LABEL: @kmsda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmsda.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmsda(long t, unsigned long a, unsigned long b) { + return __rv__kmsda(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmsda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmsda.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmsda(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmsda(t, a, b); +} + +// CHECK-RV64-LABEL: @kmsxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kmsxda.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kmsxda(long t, unsigned long a, unsigned long b) { + return __rv__kmsxda(t, a, b); +} + +// CHECK-RV64-LABEL: @v_kmsxda( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.kmsxda.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_kmsxda(int32x2_t t, int16x4_t a, int16x4_t b) { + return __rv__v_kmsxda(t, a, b); +} + +// CHECK-RV64-LABEL: @ksllw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksllw.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ksllw(long a, unsigned int b) { + return __rv__ksllw(a, b); +} + +// CHECK-RV64-LABEL: @ksll8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksll8.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ksll8(unsigned long a, unsigned int b) { + return __rv__ksll8(a, b); +} + +// CHECK-RV64-LABEL: @v_ksll8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.ksll8.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_ksll8(int8x8_t a, unsigned int b) { + return __rv__v_ksll8(a, b); +} + +// CHECK-RV64-LABEL: @ksll16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksll16.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ksll16(unsigned long a, unsigned int b) { + return __rv__ksll16(a, b); +} + +// CHECK-RV64-LABEL: @v_ksll16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.ksll16.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_ksll16(int16x4_t a, unsigned int b) { + return __rv__v_ksll16(a, b); +} + +// CHECK-RV64-LABEL: @kslra8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslra8.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslra8(unsigned long a, int b) { + return __rv__kslra8(a, b); +} + +// CHECK-RV64-LABEL: @v_kslra8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.kslra8.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_kslra8(int8x8_t a, int b) { + return __rv__v_kslra8(a, b); +} + +// CHECK-RV64-LABEL: @kslra8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslra8.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslra8_u(unsigned long a, int b) { + return __rv__kslra8_u(a, b); +} + +// CHECK-RV64-LABEL: @v_kslra8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.kslra8.u.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_kslra8_u(int8x8_t a, int b) { + return __rv__v_kslra8_u(a, b); +} + +// CHECK-RV64-LABEL: @kslra16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslra16.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslra16(unsigned long a, int b) { + return __rv__kslra16(a, b); +} + +// CHECK-RV64-LABEL: @v_kslra16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.kslra16.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_kslra16(int16x4_t a, int b) { + return __rv__v_kslra16(a, b); +} + +// CHECK-RV64-LABEL: @kslra16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslra16.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslra16_u(unsigned long a, int b) { + return __rv__kslra16_u(a, b); +} + +// CHECK-RV64-LABEL: @v_kslra16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.kslra16.u.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_kslra16_u(int16x4_t a, int b) { + return __rv__v_kslra16_u(a, b); +} + +// CHECK-RV64-LABEL: @kslraw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslraw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslraw(int a, int b) { + return __rv__kslraw(a, b); +} + +// CHECK-RV64-LABEL: @kslraw_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kslraw.u.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kslraw_u(int a, int b) { + return __rv__kslraw_u(a, b); +} + +// CHECK-RV64-LABEL: @kstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kstas16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kstas16(unsigned long a, unsigned long b) { + return __rv__kstas16(a, b); +} + +// CHECK-RV64-LABEL: @v_kstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.kstas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_kstas16(int16x4_t a, int16x4_t b) { + return __rv__v_kstas16(a, b); +} + +// CHECK-RV64-LABEL: @kstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kstsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long kstsa16(unsigned long a, unsigned long b) { + return __rv__kstsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_kstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.kstsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_kstsa16(int16x4_t a, int16x4_t b) { + return __rv__v_kstsa16(a, b); +} + +// CHECK-RV64-LABEL: @ksub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksub8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ksub8(unsigned long a, unsigned long b) { + return __rv__ksub8(a, b); +} + +// CHECK-RV64-LABEL: @v_ksub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.ksub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_ksub8(int8x8_t a, int8x8_t b) { + return __rv__v_ksub8(a, b); +} + +// CHECK-RV64-LABEL: @ksub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksub16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ksub16(unsigned long a, unsigned long b) { + return __rv__ksub16(a, b); +} + +// CHECK-RV64-LABEL: @v_ksub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ksub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_ksub16(int16x4_t a, int16x4_t b) { + return __rv__v_ksub16(a, b); +} + +// CHECK-RV64-LABEL: @ksubh( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksubh.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ksubh(int a, int b) { + return __rv__ksubh(a, b); +} + +// CHECK-RV64-LABEL: @ksubw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ksubw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ksubw(int a, int b) { + return __rv__ksubw(a, b); +} + +// CHECK-RV64-LABEL: @kwmmul( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kwmmul.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kwmmul(long a, long b) { + return __rv__kwmmul(a, b); +} + +// CHECK-RV64-LABEL: @v_kwmmul( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kwmmul.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kwmmul(int32x2_t a, int32x2_t b) { + return __rv__v_kwmmul(a, b); +} + +// CHECK-RV64-LABEL: @kwmmul_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.kwmmul.u.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long kwmmul_u(long a, long b) { + return __rv__kwmmul_u(a, b); +} + +// CHECK-RV64-LABEL: @v_kwmmul_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.kwmmul.u.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_kwmmul_u(int32x2_t a, int32x2_t b) { + return __rv__v_kwmmul_u(a, b); +} + +// CHECK-RV64-LABEL: @maxw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.maxw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long maxw(int a, int b) { + return __rv__maxw(a, b); +} + +// CHECK-RV64-LABEL: @minw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.minw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long minw(int a, int b) { + return __rv__minw(a, b); +} + +// CHECK-RV64-LABEL: @pbsad( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pbsad.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pbsad(unsigned long a, unsigned long b) { + return __rv__pbsad(a, b); +} + +// CHECK-RV64-LABEL: @v_pbsad( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.pbsad.i64.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +unsigned long v_pbsad(uint8x8_t a, uint8x8_t b) { + return __rv__v_pbsad(a, b); +} + +// CHECK-RV64-LABEL: @pbsada( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pbsada.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pbsada(unsigned long t, unsigned long a, unsigned long b) { + return __rv__pbsada(t, a, b); +} + +// CHECK-RV64-LABEL: @v_pbsada( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.riscv.pbsada.i64.v8i8(i64 [[T:%.*]], <8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +unsigned long v_pbsada(unsigned long t, uint8x8_t a, uint8x8_t b) { + return __rv__v_pbsada(t, a, b); +} + +// CHECK-RV64-LABEL: @pkbb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pkbb16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pkbb16(unsigned long a, unsigned long b) { + return __rv__pkbb16(a, b); +} + +// CHECK-RV64-LABEL: @v_pkbb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.pkbb16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_pkbb16(uint16x4_t a, uint16x4_t b) { + return __rv__v_pkbb16(a, b); +} + +// CHECK-RV64-LABEL: @pkbt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pkbt16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pkbt16(unsigned long a, unsigned long b) { + return __rv__pkbt16(a, b); +} + +// CHECK-RV64-LABEL: @v_pkbt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.pkbt16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_pkbt16(uint16x4_t a, uint16x4_t b) { + return __rv__v_pkbt16(a, b); +} + +// CHECK-RV64-LABEL: @pktb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pktb16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pktb16(unsigned long a, unsigned long b) { + return __rv__pktb16(a, b); +} + +// CHECK-RV64-LABEL: @v_pktb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.pktb16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_pktb16(uint16x4_t a, uint16x4_t b) { + return __rv__v_pktb16(a, b); +} + +// CHECK-RV64-LABEL: @pktt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.pktt16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long pktt16(unsigned long a, unsigned long b) { + return __rv__pktt16(a, b); +} + +// CHECK-RV64-LABEL: @v_pktt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.pktt16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_pktt16(uint16x4_t a, uint16x4_t b) { + return __rv__v_pktt16(a, b); +} + +// CHECK-RV64-LABEL: @radd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.radd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long radd8(unsigned long a, unsigned long b) { + return __rv__radd8(a, b); +} + +// CHECK-RV64-LABEL: @v_radd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.radd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_radd8(int8x8_t a, int8x8_t b) { + return __rv__v_radd8(a, b); +} + +// CHECK-RV64-LABEL: @radd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.radd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long radd16(unsigned long a, unsigned long b) { + return __rv__radd8(a, b); +} + +// CHECK-RV64-LABEL: @v_radd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.radd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_radd16(int8x8_t a, int8x8_t b) { + return __rv__v_radd8(a, b); +} + +// CHECK-RV64-LABEL: @raddw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.raddw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long raddw(int a, int b) { + return __rv__raddw(a, b); +} + +// CHECK-RV64-LABEL: @rcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rcras16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rcras16(unsigned long a, unsigned long b) { + return __rv__rcras16(a, b); +} + +// CHECK-RV64-LABEL: @v_rcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.rcras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_rcras16(int16x4_t a, int16x4_t b) { + return __rv__v_rcras16(a, b); +} + +// CHECK-RV64-LABEL: @rcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rcrsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rcrsa16(unsigned long a, unsigned long b) { + return __rv__rcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_rcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.rcrsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_rcrsa16(int16x4_t a, int16x4_t b) { + return __rv__v_rcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @rstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rstas16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rstas16(unsigned long a, unsigned long b) { + return __rv__rstas16(a, b); +} + +// CHECK-RV64-LABEL: @v_rstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.rstas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_rstas16(int16x4_t a, int16x4_t b) { + return __rv__v_rstas16(a, b); +} + +// CHECK-RV64-LABEL: @rstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rstsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rstsa16(unsigned long a, unsigned long b) { + return __rv__rstsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_rstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.rstsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_rstsa16(int16x4_t a, int16x4_t b) { + return __rv__v_rstsa16(a, b); +} + +// CHECK-RV64-LABEL: @rsub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rsub8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rsub8(unsigned long a, unsigned long b) { + return __rv__rsub8(a, b); +} + +// CHECK-RV64-LABEL: @v_rsub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.rsub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_rsub8(int8x8_t a, int8x8_t b) { + return __rv__v_rsub8(a, b); +} + +// CHECK-RV64-LABEL: @rsub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rsub16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long rsub16(unsigned long a, unsigned long b) { + return __rv__rsub16(a, b); +} + +// CHECK-RV64-LABEL: @v_rsub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.rsub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_rsub16(int16x4_t a, int16x4_t b) { + return __rv__v_rsub16(a, b); +} + +// CHECK-RV64-LABEL: @rsubw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.rsubw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long rsubw(int a, int b) { + return __rv__rsubw(a, b); +} + +// CHECK-RV64-LABEL: @sclip8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sclip8.i64.i64(i64 [[A:%.*]], i64 7) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sclip8(unsigned long a) { + return __rv__sclip8(a, 7); +} + +// CHECK-RV64-LABEL: @v_sclip8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.sclip8.v8i8.i64(<8 x i8> [[TMP0]], i64 7) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_sclip8(int8x8_t a) { + return __rv__v_sclip8(a, 7); +} + +// CHECK-RV64-LABEL: @sclip16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sclip16.i64.i64(i64 [[A:%.*]], i64 8) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sclip16(unsigned long a) { + return __rv__sclip16(a, 8); +} + +// CHECK-RV64-LABEL: @v_sclip16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.sclip16.v4i16.i64(<4 x i16> [[TMP0]], i64 8) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sclip16(int16x4_t a) { + return __rv__v_sclip16(a, 8); +} + +// CHECK-RV64-LABEL: @sclip32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sclip32.i64.i64(i64 [[A:%.*]], i64 9) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long sclip32(long a) { + return __rv__sclip32(a, 9); +} + +// CHECK-RV64-LABEL: @v_sclip32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.riscv.sclip32.v2i32.i64(<2 x i32> [[TMP0]], i64 9) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int32x2_t v_sclip32(int32x2_t a) { + return __rv__v_sclip32(a, 9); +} + +// CHECK-RV64-LABEL: @scmple8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.scmple8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long scmple8(unsigned long a, unsigned long b) { + return __rv__scmple8(a, b); +} + +// CHECK-RV64-LABEL: @v_scmple8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.scmple8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_scmple8(int8x8_t a, int8x8_t b) { + return __rv__v_scmple8(a, b); +} + +// CHECK-RV64-LABEL: @scmple16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.scmple16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long scmple16(unsigned long a, unsigned long b) { + return __rv__scmple16(a, b); +} + +// CHECK-RV64-LABEL: @v_scmple16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.scmple16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_scmple16(int16x4_t a, int16x4_t b) { + return __rv__v_scmple16(a, b); +} + +// CHECK-RV64-LABEL: @scmplt8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.scmplt8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long scmplt8(unsigned long a, unsigned long b) { + return __rv__scmplt8(a, b); +} + +// CHECK-RV64-LABEL: @v_scmplt8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.scmplt8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_scmplt8(int8x8_t a, int8x8_t b) { + return __rv__v_scmplt8(a, b); +} + +// CHECK-RV64-LABEL: @scmplt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.scmplt16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long scmplt16(unsigned long a, unsigned long b) { + return __rv__scmplt16(a, b); +} + +// CHECK-RV64-LABEL: @v_scmplt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.scmplt16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_scmplt16(int16x4_t a, int16x4_t b) { + return __rv__v_scmplt16(a, b); +} + +// CHECK-RV64-LABEL: @sll8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sll8.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sll8(unsigned long a, unsigned int b) { + return __rv__sll8(a, b); +} + +// CHECK-RV64-LABEL: @v_sll8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.sll8.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint8x8_t v_sll8(uint8x8_t a, unsigned int b) { + return __rv__v_sll8(a, b); +} + +// CHECK-RV64-LABEL: @sll16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sll16.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sll16(unsigned long a, unsigned int b) { + return __rv__sll16(a, b); +} + +// CHECK-RV64-LABEL: @v_sll16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.sll16.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_sll16(uint16x4_t a, unsigned int b) { + return __rv__v_sll16(a, b); +} + +// CHECK-RV64-LABEL: @smaqa( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smaqa.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smaqa(long t, unsigned long a, unsigned long b) { + return __rv__smaqa(t, a, b); +} + +// CHECK-RV64-LABEL: @v_smaqa( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.smaqa.v2i32.v8i8(<2 x i32> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_smaqa(int32x2_t t, int8x8_t a, int8x8_t b) { + return __rv__v_smaqa(t, a, b); +} + +// CHECK-RV64-LABEL: @smaqa_su( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smaqa.su.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smaqa_su(long t, unsigned long a, unsigned long b) { + return __rv__smaqa_su(t, a, b); +} + +// CHECK-RV64-LABEL: @v_smaqa_su( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.smaqa.su.v2i32.v8i8(<2 x i32> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +int32x2_t v_smaqa_su(int32x2_t t, int8x8_t a, int8x8_t b) { + return __rv__v_smaqa_su(t, a, b); +} + +// CHECK-RV64-LABEL: @smax8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smax8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long smax8(unsigned long a, unsigned long b) { + return __rv__smax8(a, b); +} + +// CHECK-RV64-LABEL: @v_smax8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.smax8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_smax8(int8x8_t a, int8x8_t b) { + return __rv__v_smax8(a, b); +} + +// CHECK-RV64-LABEL: @smax16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smax16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long smax16(unsigned long a, unsigned long b) { + return __rv__smax16(a, b); +} + +// CHECK-RV64-LABEL: @v_smax16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.smax16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_smax16(int16x4_t a, int16x4_t b) { + return __rv__v_smax16(a, b); +} + +// CHECK-RV64-LABEL: @smbb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smbb16.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smbb16(unsigned long a, unsigned long b) { + return __rv__smbb16(a, b); +} + +// CHECK-RV64-LABEL: @v_smbb16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smbb16.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smbb16(int16x4_t a, int16x4_t b) { + return __rv__v_smbb16(a, b); +} + +// CHECK-RV64-LABEL: @smbt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smbt16.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smbt16(unsigned long a, unsigned long b) { + return __rv__smbt16(a, b); +} + +// CHECK-RV64-LABEL: @v_smbt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smbt16.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smbt16(int16x4_t a, int16x4_t b) { + return __rv__v_smbt16(a, b); +} + +// CHECK-RV64-LABEL: @smtt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smtt16.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smtt16(unsigned long a, unsigned long b) { + return __rv__smtt16(a, b); +} + +// CHECK-RV64-LABEL: @v_smtt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smtt16.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smtt16(int16x4_t a, int16x4_t b) { + return __rv__v_smtt16(a, b); +} + +// CHECK-RV64-LABEL: @smds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smds.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smds(unsigned long a, unsigned long b) { + return __rv__smds(a, b); +} + +// CHECK-RV64-LABEL: @v_smds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smds.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smds(int16x4_t a, int16x4_t b) { + return __rv__v_smds(a, b); +} + +// CHECK-RV64-LABEL: @smdrs( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smdrs.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smdrs(unsigned long a, unsigned long b) { + return __rv__smdrs(a, b); +} + +// CHECK-RV64-LABEL: @v_smdrs( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smdrs.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smdrs(int16x4_t a, int16x4_t b) { + return __rv__v_smdrs(a, b); +} + +// CHECK-RV64-LABEL: @smxds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smxds.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smxds(unsigned long a, unsigned long b) { + return __rv__smxds(a, b); +} + +// CHECK-RV64-LABEL: @v_smxds( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smxds.v2i32.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smxds(int16x4_t a, int16x4_t b) { + return __rv__v_smxds(a, b); +} + +// CHECK-RV64-LABEL: @smin8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smin8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long smin8(unsigned long a, unsigned long b) { + return __rv__smin8(a, b); +} + +// CHECK-RV64-LABEL: @v_smin8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.smin8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_smin8(int8x8_t a, int8x8_t b) { + return __rv__v_smin8(a, b); +} + +// CHECK-RV64-LABEL: @smin16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smin16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long smin16(unsigned long a, unsigned long b) { + return __rv__smin16(a, b); +} + +// CHECK-RV64-LABEL: @v_smin16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.smin16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_smin16(int16x4_t a, int16x4_t b) { + return __rv__v_smin16(a, b); +} + +// CHECK-RV64-LABEL: @smmul( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmul.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmul(long a, long b) { + return __rv__smmul(a, b); +} + +// CHECK-RV64-LABEL: @v_smmul( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmul.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmul(int32x2_t a, int32x2_t b) { + return __rv__v_smmul(a, b); +} + +// CHECK-RV64-LABEL: @smmul_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmul.u.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmul_u(long a, long b) { + return __rv__smmul_u(a, b); +} + +// CHECK-RV64-LABEL: @v_smmul_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmul.u.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmul_u(int32x2_t a, int32x2_t b) { + return __rv__v_smmul_u(a, b); +} + +// CHECK-RV64-LABEL: @smmwb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmwb.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmwb(long a, long b) { + return __rv__smmwb(a, b); +} + +// CHECK-RV64-LABEL: @v_smmwb( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmwb.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmwb(int32x2_t a, int16x4_t b) { + return __rv__v_smmwb(a, b); +} + +// CHECK-RV64-LABEL: @smmwb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmwb.u.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmwb_u(long a, long b) { + return __rv__smmwb_u(a, b); +} + +// CHECK-RV64-LABEL: @v_smmwb_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmwb.u.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmwb_u(int32x2_t a, int16x4_t b) { + return __rv__v_smmwb_u(a, b); +} + +// CHECK-RV64-LABEL: @smmwt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmwt.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmwt(long a, long b) { + return __rv__smmwt(a, b); +} + +// CHECK-RV64-LABEL: @v_smmwt( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmwt.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmwt(int32x2_t a, int16x4_t b) { + return __rv__v_smmwt(a, b); +} + +// CHECK-RV64-LABEL: @smmwt_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.smmwt.u.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long smmwt_u(long a, long b) { + return __rv__smmwt_u(a, b); +} + +// CHECK-RV64-LABEL: @v_smmwt_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.smmwt.u.v2i32.v4i16(<2 x i32> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t v_smmwt_u(int32x2_t a, int16x4_t b) { + return __rv__v_smmwt_u(a, b); +} + +// CHECK-RV64-LABEL: @sra_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sra.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long sra_u(long a, unsigned int b) { + return __rv__sra_u(a, b); +} + +// CHECK-RV64-LABEL: @sra8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sra8.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sra8(unsigned long a, unsigned int b) { + return __rv__sra8(a, b); +} + +// CHECK-RV64-LABEL: @v_sra8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.sra8.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_sra8(int8x8_t a, unsigned int b) { + return __rv__v_sra8(a, b); +} + +// CHECK-RV64-LABEL: @sra8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sra8.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sra8_u(unsigned long a, unsigned int b) { + return __rv__sra8_u(a, b); +} + +// CHECK-RV64-LABEL: @v_sra8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.sra8.u.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_sra8_u(int8x8_t a, unsigned int b) { + return __rv__v_sra8_u(a, b); +} + +// CHECK-RV64-LABEL: @sra16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sra16.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sra16(unsigned long a, unsigned int b) { + return __rv__sra16(a, b); +} + +// CHECK-RV64-LABEL: @v_sra16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.sra16.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sra16(int16x4_t a, unsigned int b) { + return __rv__v_sra16(a, b); +} + +// CHECK-RV64-LABEL: @sra16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sra16.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sra16_u(unsigned long a, unsigned int b) { + return __rv__sra16_u(a, b); +} + +// CHECK-RV64-LABEL: @v_sra16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.sra16.u.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sra16_u(int16x4_t a, unsigned int b) { + return __rv__v_sra16_u(a, b); +} + +// CHECK-RV64-LABEL: @srl8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.srl8.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long srl8(unsigned long a, unsigned int b) { + return __rv__srl8(a, b); +} + +// CHECK-RV64-LABEL: @v_srl8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.srl8.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_srl8(int8x8_t a, unsigned int b) { + return __rv__v_srl8(a, b); +} + +// CHECK-RV64-LABEL: @srl8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.srl8.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long srl8_u(unsigned long a, unsigned int b) { + return __rv__srl8_u(a, b); +} + +// CHECK-RV64-LABEL: @v_srl8_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.srl8.u.v8i8.i64(<8 x i8> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_srl8_u(int8x8_t a, unsigned int b) { + return __rv__v_srl8_u(a, b); +} + +// CHECK-RV64-LABEL: @srl16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.srl16.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long srl16(unsigned long a, unsigned int b) { + return __rv__srl16(a, b); +} + +// CHECK-RV64-LABEL: @v_srl16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.srl16.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_srl16(int16x4_t a, unsigned int b) { + return __rv__v_srl16(a, b); +} + +// CHECK-RV64-LABEL: @srl16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.srl16.u.i64.i64(i64 [[A:%.*]], i64 [[CONV]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long srl16_u(unsigned long a, unsigned int b) { + return __rv__srl16_u(a, b); +} + +// CHECK-RV64-LABEL: @v_srl16_u( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.srl16.u.v4i16.i64(<4 x i16> [[TMP0]], i64 [[CONV]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_srl16_u(int16x4_t a, unsigned int b) { + return __rv__v_srl16_u(a, b); +} + +// CHECK-RV64-LABEL: @stas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.stas16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long stas16(unsigned long a, unsigned long b) { + return __rv__stas16(a, b); +} + +// CHECK-RV64-LABEL: @v_ustas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.stas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ustas16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ustas16(a, b); +} + +// CHECK-RV64-LABEL: @v_sstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.stas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_sstas16(int16x4_t a, int16x4_t b) { + return __rv__v_sstas16(a, b); +} + +// CHECK-RV64-LABEL: @stsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.stsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long stsa16(unsigned long a, unsigned long b) { + return __rv__stsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_ustsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.stsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ustsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ustsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_sstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.stsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_sstsa16(int16x4_t a, int16x4_t b) { + return __rv__v_sstsa16(a, b); +} + +// CHECK-RV64-LABEL: @sub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sub8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sub8(unsigned long a, unsigned long b) { + return __rv__sub8(a, b); +} + +// CHECK-RV64-LABEL: @v_usub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.sub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_usub8(uint8x8_t a, uint8x8_t b) { + return __rv__v_usub8(a, b); +} + +// CHECK-RV64-LABEL: @v_ssub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.sub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int8x8_t v_ssub8(int8x8_t a, int8x8_t b) { + return __rv__v_ssub8(a, b); +} + +// CHECK-RV64-LABEL: @sub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sub16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sub16(unsigned long a, unsigned long b) { + return __rv__sub16(a, b); +} + +// CHECK-RV64-LABEL: @v_usub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.sub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_usub16(uint16x4_t a, uint16x4_t b) { + return __rv__v_usub16(a, b); +} + +// CHECK-RV64-LABEL: @v_ssub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.sub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t v_ssub16(int16x4_t a, int16x4_t b) { + return __rv__v_ssub16(a, b); +} + +// CHECK-RV64-LABEL: @sunpkd810( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sunpkd810.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sunpkd810(unsigned long a) { + return __rv__sunpkd810(a); +} + +// CHECK-RV64-LABEL: @v_sunpkd810( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.sunpkd810.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sunpkd810(int8x8_t a) { + return __rv__v_sunpkd810(a); +} + +// CHECK-RV64-LABEL: @sunpkd820( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sunpkd820.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sunpkd820(unsigned long a) { + return __rv__sunpkd820(a); +} + +// CHECK-RV64-LABEL: @v_sunpkd820( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.sunpkd820.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sunpkd820(int8x8_t a) { + return __rv__v_sunpkd820(a); +} + +// CHECK-RV64-LABEL: @sunpkd830( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sunpkd830.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sunpkd830(unsigned long a) { + return __rv__sunpkd830(a); +} + +// CHECK-RV64-LABEL: @v_sunpkd830( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.sunpkd830.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sunpkd830(int8x8_t a) { + return __rv__v_sunpkd830(a); +} + +// CHECK-RV64-LABEL: @sunpkd831( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sunpkd831.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sunpkd831(unsigned long a) { + return __rv__sunpkd831(a); +} + +// CHECK-RV64-LABEL: @v_sunpkd831( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.sunpkd831.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sunpkd831(int8x8_t a) { + return __rv__v_sunpkd831(a); +} + +// CHECK-RV64-LABEL: @sunpkd832( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.sunpkd832.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long sunpkd832(unsigned long a) { + return __rv__sunpkd832(a); +} + +// CHECK-RV64-LABEL: @v_sunpkd832( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.sunpkd832.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_sunpkd832(int8x8_t a) { + return __rv__v_sunpkd832(a); +} + +// CHECK-RV64-LABEL: @swap8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.swap8.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long swap8(unsigned long a) { + return __rv__swap8(a); +} + +// CHECK-RV64-LABEL: @v_swap8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.swap8.v8i8(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint8x8_t v_swap8(uint8x8_t a) { + return __rv__v_swap8(a); +} + +// CHECK-RV64-LABEL: @swap16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.swap16.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long swap16(unsigned long a) { + return __rv__swap16(a); +} + +// CHECK-RV64-LABEL: @v_swap16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.swap16.v4i16(<4 x i16> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_swap16(uint16x4_t a) { + return __rv__v_swap16(a); +} + +// CHECK-RV64-LABEL: @uclip8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uclip8.i64.i64(i64 [[A:%.*]], i64 7) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uclip8(unsigned long a) { + return __rv__uclip8(a, 7); +} + +// CHECK-RV64-LABEL: @v_uclip8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <8 x i8> @llvm.riscv.uclip8.v8i8.i64(<8 x i8> [[TMP0]], i64 7) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int8x8_t v_uclip8(int8x8_t a) { + return __rv__v_uclip8(a, 7); +} + +// CHECK-RV64-LABEL: @uclip16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uclip16.i64.i64(i64 [[A:%.*]], i64 8) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uclip16(unsigned long a) { + return __rv__uclip16(a, 8); +} + +// CHECK-RV64-LABEL: @v_uclip16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.uclip16.v4i16.i64(<4 x i16> [[TMP0]], i64 8) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int16x4_t v_uclip16(int16x4_t a) { + return __rv__v_uclip16(a, 8); +} + +// CHECK-RV64-LABEL: @uclip32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uclip32.i64.i64(i64 [[A:%.*]], i64 9) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long uclip32(long a) { + return __rv__uclip32(a, 9); +} + +// CHECK-RV64-LABEL: @v_uclip32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.riscv.uclip32.v2i32.i64(<2 x i32> [[TMP0]], i64 9) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +int32x2_t v_uclip32(int32x2_t a) { + return __rv__v_uclip32(a, 9); +} + +// CHECK-RV64-LABEL: @ucmple8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ucmple8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ucmple8(unsigned long a, unsigned long b) { + return __rv__ucmple8(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmple8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.ucmple8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_ucmple8(int8x8_t a, int8x8_t b) { + return __rv__v_ucmple8(a, b); +} + +// CHECK-RV64-LABEL: @ucmple16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ucmple16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ucmple16(unsigned long a, unsigned long b) { + return __rv__ucmple16(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmple16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ucmple16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ucmple16(int16x4_t a, int16x4_t b) { + return __rv__v_ucmple16(a, b); +} + +// CHECK-RV64-LABEL: @ucmplt8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ucmplt8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ucmplt8(unsigned long a, unsigned long b) { + return __rv__ucmplt8(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmplt8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.ucmplt8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_ucmplt8(int8x8_t a, int8x8_t b) { + return __rv__v_ucmplt8(a, b); +} + +// CHECK-RV64-LABEL: @ucmplt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ucmplt16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ucmplt16(unsigned long a, unsigned long b) { + return __rv__ucmplt16(a, b); +} + +// CHECK-RV64-LABEL: @v_ucmplt16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ucmplt16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ucmplt16(int16x4_t a, int16x4_t b) { + return __rv__v_ucmplt16(a, b); +} + +// CHECK-RV64-LABEL: @ukadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukadd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukadd8(unsigned long a, unsigned long b) { + return __rv__ukadd8(a, b); +} + +// CHECK-RV64-LABEL: @v_ukadd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.ukadd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_ukadd8(uint8x8_t a, uint8x8_t b) { + return __rv__v_ukadd8(a, b); +} + +// CHECK-RV64-LABEL: @ukadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukadd16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukadd16(unsigned long a, unsigned long b) { + return __rv__ukadd16(a, b); +} + +// CHECK-RV64-LABEL: @v_ukadd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ukadd16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ukadd16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ukadd16(a, b); +} + +// CHECK-RV64-LABEL: @ukaddh( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukaddh.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ukaddh(int a, int b) { + return __rv__ukaddh(a, b); +} + +// CHECK-RV64-LABEL: @ukaddw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukaddw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +long ukaddw(int a, int b) { + return __rv__ukaddw(a, b); +} + +// CHECK-RV64-LABEL: @ukcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukcras16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukcras16(unsigned long a, unsigned long b) { + return __rv__ukcras16(a, b); +} + +// CHECK-RV64-LABEL: @v_ukcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ukcras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ukcras16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ukcras16(a, b); +} + +// CHECK-RV64-LABEL: @ukcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukcrsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukcrsa16(unsigned long a, unsigned long b) { + return __rv__ukcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_ukcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ukcrsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ukcrsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ukcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @ukstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukstas16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukstas16(unsigned long a, unsigned long b) { + return __rv__ukstas16(a, b); +} + +// CHECK-RV64-LABEL: @v_ukstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ukstas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ukstas16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ukstas16(a, b); +} + +// CHECK-RV64-LABEL: @ukstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ukstsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ukstsa16(unsigned long a, unsigned long b) { + return __rv__ukstsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_ukstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ukstsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ukstsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ukstsa16(a, b); +} + +// CHECK-RV64-LABEL: @uksub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uksub8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uksub8(unsigned long a, unsigned long b) { + return __rv__uksub8(a, b); +} + +// CHECK-RV64-LABEL: @v_uksub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.uksub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_uksub8(uint8x8_t a, uint8x8_t b) { + return __rv__v_uksub8(a, b); +} + +// CHECK-RV64-LABEL: @uksub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uksub16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uksub16(unsigned long a, unsigned long b) { + return __rv__uksub16(a, b); +} + +// CHECK-RV64-LABEL: @v_uksub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.uksub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_uksub16(uint16x4_t a, uint16x4_t b) { + return __rv__v_uksub16(a, b); +} + +// CHECK-RV64-LABEL: @uksubh( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uksubh.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uksubh(unsigned int a, unsigned int b) { + return __rv__uksubh(a, b); +} + +// CHECK-RV64-LABEL: @uksubw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uksubw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uksubw(unsigned int a, unsigned int b) { + return __rv__uksubw(a, b); +} + +// CHECK-RV64-LABEL: @umaqa( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.umaqa.i64.i64(i64 [[T:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long umaqa(unsigned long t, unsigned long a, unsigned long b) { + return __rv__umaqa(t, a, b); +} + +// CHECK-RV64-LABEL: @v_umaqa( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[T_COERCE:%.*]] to <2 x i32> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.riscv.umaqa.v2i32.v8i8(<2 x i32> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP4]] +// +uint32x2_t v_umaqa(uint32x2_t t, uint8x8_t a, uint8x8_t b) { + return __rv__v_umaqa(t, a, b); +} + +// CHECK-RV64-LABEL: @umax8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.umax8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long umax8(unsigned long a, unsigned long b) { + return __rv__umax8(a, b); +} + +// CHECK-RV64-LABEL: @v_umax8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.umax8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_umax8(uint8x8_t a, uint8x8_t b) { + return __rv__v_umax8(a, b); +} + +// CHECK-RV64-LABEL: @umax16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.umax16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long umax16(unsigned long a, unsigned long b) { + return __rv__umax16(a, b); +} + +// CHECK-RV64-LABEL: @v_umax16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.umax16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_umax16(uint16x4_t a, uint16x4_t b) { + return __rv__v_umax16(a, b); +} + +// CHECK-RV64-LABEL: @umin8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.umin8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long umin8(unsigned long a, unsigned long b) { + return __rv__umin8(a, b); +} + +// CHECK-RV64-LABEL: @v_umin8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.umin8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_umin8(uint8x8_t a, uint8x8_t b) { + return __rv__v_umin8(a, b); +} + +// CHECK-RV64-LABEL: @umin16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.umin16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long umin16(unsigned long a, unsigned long b) { + return __rv__umin16(a, b); +} + +// CHECK-RV64-LABEL: @v_umin16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.umin16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_umin16(uint16x4_t a, uint16x4_t b) { + return __rv__v_umin16(a, b); +} + +// CHECK-RV64-LABEL: @uradd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uradd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uradd8(unsigned long a, unsigned long b) { + return __rv__uradd8(a, b); +} + +// CHECK-RV64-LABEL: @v_uradd8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.uradd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_uradd8(uint8x8_t a, uint8x8_t b) { + return __rv__v_uradd8(a, b); +} + +// CHECK-RV64-LABEL: @uradd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uradd8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uradd16(unsigned long a, unsigned long b) { + return __rv__uradd8(a, b); +} + +// CHECK-RV64-LABEL: @v_uradd16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.uradd8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_uradd16(uint8x8_t a, uint8x8_t b) { + return __rv__v_uradd8(a, b); +} + +// CHECK-RV64-LABEL: @uraddw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.uraddw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long uraddw(unsigned int a, unsigned int b) { + return __rv__uraddw(a, b); +} + +// CHECK-RV64-LABEL: @urcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.urcras16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long urcras16(unsigned long a, unsigned long b) { + return __rv__urcras16(a, b); +} + +// CHECK-RV64-LABEL: @v_urcras16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.urcras16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_urcras16(uint16x4_t a, uint16x4_t b) { + return __rv__v_urcras16(a, b); +} + +// CHECK-RV64-LABEL: @urcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.urcrsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long urcrsa16(unsigned long a, unsigned long b) { + return __rv__urcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_urcrsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.urcrsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_urcrsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_urcrsa16(a, b); +} + +// CHECK-RV64-LABEL: @urstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.urstas16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long urstas16(unsigned long a, unsigned long b) { + return __rv__urstas16(a, b); +} + +// CHECK-RV64-LABEL: @v_urstas16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.urstas16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_urstas16(uint16x4_t a, uint16x4_t b) { + return __rv__v_urstas16(a, b); +} + +// CHECK-RV64-LABEL: @urstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.urstsa16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long urstsa16(unsigned long a, unsigned long b) { + return __rv__urstsa16(a, b); +} + +// CHECK-RV64-LABEL: @v_urstsa16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.urstsa16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_urstsa16(uint16x4_t a, uint16x4_t b) { + return __rv__v_urstsa16(a, b); +} + +// CHECK-RV64-LABEL: @ursub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ursub8.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ursub8(unsigned long a, unsigned long b) { + return __rv__ursub8(a, b); +} + +// CHECK-RV64-LABEL: @v_ursub8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.ursub8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t v_ursub8(uint8x8_t a, uint8x8_t b) { + return __rv__v_ursub8(a, b); +} + +// CHECK-RV64-LABEL: @ursub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ursub16.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ursub16(unsigned long a, unsigned long b) { + return __rv__ursub16(a, b); +} + +// CHECK-RV64-LABEL: @v_ursub16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// CHECK-RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.ursub16.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// CHECK-RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t v_ursub16(uint16x4_t a, uint16x4_t b) { + return __rv__v_ursub16(a, b); +} + +// CHECK-RV64-LABEL: @ursubw( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[CONV:%.*]] = zext i32 [[A:%.*]] to i64 +// CHECK-RV64-NEXT: [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64 +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.ursubw.i64(i64 [[CONV]], i64 [[CONV1]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long ursubw(unsigned int a, unsigned int b) { + return __rv__ursubw(a, b); +} + +// CHECK-RV64-LABEL: @zunpkd810( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.zunpkd810.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long zunpkd810(unsigned long a) { + return __rv__zunpkd810(a); +} + +// CHECK-RV64-LABEL: @v_zunpkd810( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.zunpkd810.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_zunpkd810(uint8x8_t a) { + return __rv__v_zunpkd810(a); +} + +// CHECK-RV64-LABEL: @zunpkd820( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.zunpkd820.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long zunpkd820(unsigned long a) { + return __rv__zunpkd820(a); +} + +// CHECK-RV64-LABEL: @v_zunpkd820( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.zunpkd820.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_zunpkd820(uint8x8_t a) { + return __rv__v_zunpkd820(a); +} + +// CHECK-RV64-LABEL: @zunpkd830( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.zunpkd830.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long zunpkd830(unsigned long a) { + return __rv__zunpkd830(a); +} + +// CHECK-RV64-LABEL: @v_zunpkd830( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.zunpkd830.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_zunpkd830(uint8x8_t a) { + return __rv__v_zunpkd830(a); +} + +// CHECK-RV64-LABEL: @zunpkd831( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.zunpkd831.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long zunpkd831(unsigned long a) { + return __rv__zunpkd831(a); +} + +// CHECK-RV64-LABEL: @v_zunpkd831( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.zunpkd831.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_zunpkd831(uint8x8_t a) { + return __rv__v_zunpkd831(a); +} + +// CHECK-RV64-LABEL: @zunpkd832( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.riscv.zunpkd832.i64(i64 [[A:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long zunpkd832(unsigned long a) { + return __rv__zunpkd832(a); +} + +// CHECK-RV64-LABEL: @v_zunpkd832( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// CHECK-RV64-NEXT: [[TMP1:%.*]] = tail call <4 x i16> @llvm.riscv.v.zunpkd832.v4i16(<8 x i8> [[TMP0]]) +// CHECK-RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 +// CHECK-RV64-NEXT: ret i64 [[TMP2]] +// +uint16x4_t v_zunpkd832(uint8x8_t a) { + return __rv__v_zunpkd832(a); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -100,6 +100,287 @@ def int_riscv_crc32c_d : BitManipGPRIntrinsics; } // TargetPrefix = "riscv" +//===----------------------------------------------------------------------===// +// Packing SIMD + +let TargetPrefix = "riscv" in { + class RVPUnaryIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>], + [IntrNoMem]>; + + multiclass RVPUnaryIntrinsics { + def "int_riscv_" # NAME : RVPUnaryIntrinsics; + } + + defm clrs8 : RVPUnaryIntrinsics; + defm clrs16 : RVPUnaryIntrinsics; + defm clrs32 : RVPUnaryIntrinsics; + defm clo8 : RVPUnaryIntrinsics; + defm clo16 : RVPUnaryIntrinsics; + defm clo32 : RVPUnaryIntrinsics; + defm clz8 : RVPUnaryIntrinsics; + defm clz16 : RVPUnaryIntrinsics; + defm clz32 : RVPUnaryIntrinsics; + defm kabs8 : RVPUnaryIntrinsics; + defm kabs16 : RVPUnaryIntrinsics; + defm kabsw : RVPUnaryIntrinsics; + defm swap8 : RVPUnaryIntrinsics; + defm swap16 : RVPUnaryIntrinsics; + + // Unpacking builtins have a extra intrinsic for vector type. + // Its operand's element width is half of result's element but + // with more elements. + class RVPUnaryVectorPKDIntrinsics + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + + multiclass RVPUnaryPKDIntrinsics { + def "int_riscv_" # NAME : RVPUnaryIntrinsics; + def "int_riscv_v_" # NAME : RVPUnaryVectorPKDIntrinsics; + } + + defm sunpkd810 : RVPUnaryPKDIntrinsics; + defm sunpkd820 : RVPUnaryPKDIntrinsics; + defm sunpkd830 : RVPUnaryPKDIntrinsics; + defm sunpkd831 : RVPUnaryPKDIntrinsics; + defm sunpkd832 : RVPUnaryPKDIntrinsics; + defm zunpkd810 : RVPUnaryPKDIntrinsics; + defm zunpkd820 : RVPUnaryPKDIntrinsics; + defm zunpkd830 : RVPUnaryPKDIntrinsics; + defm zunpkd831 : RVPUnaryPKDIntrinsics; + defm zunpkd832 : RVPUnaryPKDIntrinsics; + + class RVPBinaryIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + multiclass RVPBinaryIntrinsics { + def "int_riscv_" # NAME : RVPBinaryIntrinsics; + } + + defm ave : RVPBinaryIntrinsics; + defm add8 : RVPBinaryIntrinsics; + defm add16 : RVPBinaryIntrinsics; + defm bitrev : RVPBinaryIntrinsics; + defm cmpeq8 : RVPBinaryIntrinsics; + defm cmpeq16 : RVPBinaryIntrinsics; + defm cras16 : RVPBinaryIntrinsics; + defm crsa16 : RVPBinaryIntrinsics; + defm kadd8 : RVPBinaryIntrinsics; + defm kadd16 : RVPBinaryIntrinsics; + defm kaddh : RVPBinaryIntrinsics; + defm kaddw : RVPBinaryIntrinsics; + defm kcras16 : RVPBinaryIntrinsics; + defm kcrsa16 : RVPBinaryIntrinsics; + defm khm8 : RVPBinaryIntrinsics; + defm khmx8 : RVPBinaryIntrinsics; + defm khm16 : RVPBinaryIntrinsics; + defm khmx16 : RVPBinaryIntrinsics; + defm ksllw : RVPBinaryIntrinsics; + defm kslraw : RVPBinaryIntrinsics; + defm kslraw_u : RVPBinaryIntrinsics; + defm kstas16 : RVPBinaryIntrinsics; + defm kstsa16 : RVPBinaryIntrinsics; + defm ksub8 : RVPBinaryIntrinsics; + defm ksub16 : RVPBinaryIntrinsics; + defm ksubh : RVPBinaryIntrinsics; + defm ksubw : RVPBinaryIntrinsics; + defm kwmmul : RVPBinaryIntrinsics; + defm kwmmul_u : RVPBinaryIntrinsics; + defm maxw : RVPBinaryIntrinsics; + defm minw : RVPBinaryIntrinsics; + defm pkbb16 : RVPBinaryIntrinsics; + defm pkbt16 : RVPBinaryIntrinsics; + defm pktt16 : RVPBinaryIntrinsics; + defm pktb16 : RVPBinaryIntrinsics; + defm radd8 : RVPBinaryIntrinsics; + defm radd16 : RVPBinaryIntrinsics; + defm raddw : RVPBinaryIntrinsics; + defm rcras16 : RVPBinaryIntrinsics; + defm rcrsa16 : RVPBinaryIntrinsics; + defm rstas16 : RVPBinaryIntrinsics; + defm rstsa16 : RVPBinaryIntrinsics; + defm rsub8 : RVPBinaryIntrinsics; + defm rsub16 : RVPBinaryIntrinsics; + defm rsubw : RVPBinaryIntrinsics; + defm scmple8 : RVPBinaryIntrinsics; + defm scmple16 : RVPBinaryIntrinsics; + defm scmplt8 : RVPBinaryIntrinsics; + defm scmplt16 : RVPBinaryIntrinsics; + defm smax8 : RVPBinaryIntrinsics; + defm smax16 : RVPBinaryIntrinsics; + defm smin8 : RVPBinaryIntrinsics; + defm smin16 : RVPBinaryIntrinsics; + defm smmul : RVPBinaryIntrinsics; + defm smmul_u : RVPBinaryIntrinsics; + defm stas16 : RVPBinaryIntrinsics; + defm stsa16 : RVPBinaryIntrinsics; + defm sub8 : RVPBinaryIntrinsics; + defm sub16 : RVPBinaryIntrinsics; + defm ucmple8 : RVPBinaryIntrinsics; + defm ucmple16 : RVPBinaryIntrinsics; + defm ucmplt8 : RVPBinaryIntrinsics; + defm ucmplt16 : RVPBinaryIntrinsics; + defm ukadd8 : RVPBinaryIntrinsics; + defm ukadd16 : RVPBinaryIntrinsics; + defm ukaddh : RVPBinaryIntrinsics; + defm ukaddw : RVPBinaryIntrinsics; + defm ukcras16 : RVPBinaryIntrinsics; + defm ukcrsa16 : RVPBinaryIntrinsics; + defm ukstas16 : RVPBinaryIntrinsics; + defm ukstsa16 : RVPBinaryIntrinsics; + defm uksub8 : RVPBinaryIntrinsics; + defm uksub16 : RVPBinaryIntrinsics; + defm uksubh : RVPBinaryIntrinsics; + defm uksubw : RVPBinaryIntrinsics; + defm umax8 : RVPBinaryIntrinsics; + defm umax16 : RVPBinaryIntrinsics; + defm umin8 : RVPBinaryIntrinsics; + defm umin16 : RVPBinaryIntrinsics; + defm uradd8 : RVPBinaryIntrinsics; + defm uradd16 : RVPBinaryIntrinsics; + defm uraddw : RVPBinaryIntrinsics; + defm urcras16 : RVPBinaryIntrinsics; + defm urcrsa16 : RVPBinaryIntrinsics; + defm urstas16 : RVPBinaryIntrinsics; + defm urstsa16 : RVPBinaryIntrinsics; + defm ursub8 : RVPBinaryIntrinsics; + defm ursub16 : RVPBinaryIntrinsics; + defm ursubw : RVPBinaryIntrinsics; + + class RVPBinaryABBIntrinsics + : Intrinsic<[llvm_any_ty], + [llvm_any_ty, LLVMMatchType<1>], + [IntrNoMem]>; + + multiclass RVPBinaryABBIntrinsics { + def "int_riscv_" # NAME : RVPBinaryABBIntrinsics; + } + + defm kdmbb : RVPBinaryABBIntrinsics; + defm kdmbt : RVPBinaryABBIntrinsics; + defm kdmtt : RVPBinaryABBIntrinsics; + defm khmbb : RVPBinaryABBIntrinsics; + defm khmbt : RVPBinaryABBIntrinsics; + defm khmtt : RVPBinaryABBIntrinsics; + defm kmda : RVPBinaryABBIntrinsics; + defm kmxda : RVPBinaryABBIntrinsics; + defm pbsad : RVPBinaryABBIntrinsics; + defm smbb16 : RVPBinaryABBIntrinsics; + defm smbt16 : RVPBinaryABBIntrinsics; + defm smtt16 : RVPBinaryABBIntrinsics; + defm smds : RVPBinaryABBIntrinsics; + defm smdrs : RVPBinaryABBIntrinsics; + defm smxds : RVPBinaryABBIntrinsics; + + class RVPBinaryAABIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, llvm_any_ty], + [IntrNoMem]>; + + multiclass RVPBinaryAABIntrinsics { + def "int_riscv_" # NAME : RVPBinaryAABIntrinsics; + } + + defm kmmwb2 : RVPBinaryAABIntrinsics; + defm kmmwb2_u : RVPBinaryAABIntrinsics; + defm kmmwt2 : RVPBinaryAABIntrinsics; + defm kmmwt2_u : RVPBinaryAABIntrinsics; + defm smmwb : RVPBinaryAABIntrinsics; + defm smmwb_u : RVPBinaryAABIntrinsics; + defm smmwt : RVPBinaryAABIntrinsics; + defm smmwt_u : RVPBinaryAABIntrinsics; + + defm ksll8 : RVPBinaryAABIntrinsics; + defm ksll16 : RVPBinaryAABIntrinsics; + defm kslra8 : RVPBinaryAABIntrinsics; + defm kslra8_u : RVPBinaryAABIntrinsics; + defm kslra16 : RVPBinaryAABIntrinsics; + defm kslra16_u : RVPBinaryAABIntrinsics; + defm sclip8 : RVPBinaryAABIntrinsics; + defm sclip16 : RVPBinaryAABIntrinsics; + defm sclip32 : RVPBinaryAABIntrinsics; + defm sll8 : RVPBinaryAABIntrinsics; + defm sll16 : RVPBinaryAABIntrinsics; + defm sra_u : RVPBinaryAABIntrinsics; + defm sra8 : RVPBinaryAABIntrinsics; + defm sra8_u : RVPBinaryAABIntrinsics; + defm sra16 : RVPBinaryAABIntrinsics; + defm sra16_u : RVPBinaryAABIntrinsics; + defm srl8 : RVPBinaryAABIntrinsics; + defm srl8_u : RVPBinaryAABIntrinsics; + defm srl16 : RVPBinaryAABIntrinsics; + defm srl16_u : RVPBinaryAABIntrinsics; + defm uclip8 : RVPBinaryAABIntrinsics; + defm uclip16 : RVPBinaryAABIntrinsics; + defm uclip32 : RVPBinaryAABIntrinsics; + + class RVPTernaryIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + multiclass RVPTernaryIntrinsics { + def "int_riscv_" # NAME : RVPTernaryIntrinsics; + } + + defm bpick : RVPTernaryIntrinsics; + defm insb : RVPTernaryIntrinsics; + + defm kmmac : RVPTernaryIntrinsics; + defm kmmac_u : RVPTernaryIntrinsics; + defm kmmsb : RVPTernaryIntrinsics; + defm kmmsb_u : RVPTernaryIntrinsics; + + class RVPTernaryAABBIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<1>], + [IntrNoMem]>; + + multiclass RVPTernaryAABBIntrinsics { + def "int_riscv_" # NAME : RVPTernaryAABBIntrinsics; + } + + defm kdmabb : RVPTernaryAABBIntrinsics; + defm kdmabt : RVPTernaryAABBIntrinsics; + defm kdmatt : RVPTernaryAABBIntrinsics; + defm kmabb : RVPTernaryAABBIntrinsics; + defm kmabt : RVPTernaryAABBIntrinsics; + defm kmatt : RVPTernaryAABBIntrinsics; + defm kmada : RVPTernaryAABBIntrinsics; + defm kmaxda : RVPTernaryAABBIntrinsics; + defm kmads : RVPTernaryAABBIntrinsics; + defm kmadrs : RVPTernaryAABBIntrinsics; + defm kmaxds : RVPTernaryAABBIntrinsics; + defm kmsda : RVPTernaryAABBIntrinsics; + defm kmsxda : RVPTernaryAABBIntrinsics; + defm pbsada : RVPTernaryAABBIntrinsics; + defm smaqa : RVPTernaryAABBIntrinsics; + defm smaqa_su : RVPTernaryAABBIntrinsics; + defm umaqa : RVPTernaryAABBIntrinsics; + + class RVPTernaryAAABIntrinsics + : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty], + [IntrNoMem]>; + + multiclass RVPTernaryAAABIntrinsics { + def "int_riscv_" # NAME : RVPTernaryAAABIntrinsics; + } + + defm kmmawb : RVPTernaryAAABIntrinsics; + defm kmmawb_u : RVPTernaryAAABIntrinsics; + defm kmmawb2 : RVPTernaryAAABIntrinsics; + defm kmmawb2_u : RVPTernaryAAABIntrinsics; + defm kmmawt : RVPTernaryAAABIntrinsics; + defm kmmawt_u : RVPTernaryAAABIntrinsics; + defm kmmawt2 : RVPTernaryAAABIntrinsics; + defm kmmawt2_u : RVPTernaryAAABIntrinsics; +} // TargetPrefix = "riscv" + //===----------------------------------------------------------------------===// // Vectors diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -168,6 +168,17 @@ } } + if (Subtarget.hasStdExtZpn()) { + if (Subtarget.is64Bit()) { + addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass); + addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass); + addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass); + } else { + addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass); + addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass); + } + } + // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -749,6 +760,41 @@ } } + if (Subtarget.hasStdExtP()) { + const auto addTypeForP = [&](MVT VT, MVT PromotedBitwiseVT) { + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VT, Expand); + + setOperationAction(ISD::BITCAST, VT, Legal); + + // Promote load and store operations. + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType(ISD::LOAD, VT, PromotedBitwiseVT); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType(ISD::STORE, VT, PromotedBitwiseVT); + }; + + if (Subtarget.is64Bit()) { + addTypeForP(MVT::v8i8, MVT::i64); + addTypeForP(MVT::v4i16, MVT::i64); + addTypeForP(MVT::v2i32, MVT::i64); + } else { + addTypeForP(MVT::v4i8, MVT::i32); + addTypeForP(MVT::v2i16, MVT::i32); + } + + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + setTruncStoreAction(VT0, VT1, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + } + // Function alignments. const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -953,3 +953,545 @@ def : InstAlias<"rdov $rd", (CSRRS GPR:$rd, 0x009, X0)>; def : InstAlias<"clrov", (CSRRCI X0, 0x009, 1)>; } + +//===----------------------------------------------------------------------===// +// Intrinsics codegen patterns +//===----------------------------------------------------------------------===// + +class RVPBitconvertPat + : Pat<(DstVT (bitconvert (SrcVT SrcRC:$src))), + (COPY_TO_REGCLASS SrcRC:$src, DstRC)>; + +let Predicates = [HasStdExtP] in { +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; + +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +def : RVPBitconvertPat; +} + +// Unary operation +class RVPUnaryScalarPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) XLenVT:$rs1)), + (Inst GPR:$rs1)>; + +let Predicates = [HasStdExtP] in +def : RVPUnaryScalarPat; + +class RVPUnaryVector8Pat + : Pat<(XVEI8VT (!cast("int_riscv_" # IntID) XVEI8VT:$rs1)), + (Inst GPR:$rs1)>; + +multiclass RVPUnary8Pat { + def : RVPUnaryScalarPat; + def : RVPUnaryVector8Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPUnary8Pat; +defm : RVPUnary8Pat; +defm : RVPUnary8Pat; +defm : RVPUnary8Pat; +defm : RVPUnary8Pat; +} + +class RVPUnaryVector8PKDPat + : Pat<(XVEI16VT (!cast("int_riscv_" # IntID) XVEI8VT:$rs1)), + (Inst GPR:$rs1)>; + +multiclass RVPUnary8PKDPat { + def : RVPUnaryScalarPat; + def : RVPUnaryVector8PKDPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +defm : RVPUnary8PKDPat; +} + +class RVPUnaryVector16Pat + : Pat<(XVEI16VT (!cast("int_riscv_" # IntID) XVEI16VT:$rs1)), + (Inst GPR:$rs1)>; + +multiclass RVPUnary16Pat { + def : RVPUnaryScalarPat; + def : RVPUnaryVector16Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPUnary16Pat; +defm : RVPUnary16Pat; +defm : RVPUnary16Pat; +defm : RVPUnary16Pat; +defm : RVPUnary16Pat; +} + +class RVPUnaryVector32Pat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) XVEI32VT:$rs1)), + (Inst GPR:$rs1)>; + +multiclass RVPUnary32Pat { + let Predicates = [HasStdExtP] in + def : RVPUnaryScalarPat; + let Predicates = [HasStdExtP, IsRV64] in + def : RVPUnaryVector32Pat; +} + +defm : RVPUnary32Pat; +defm : RVPUnary32Pat; +defm : RVPUnary32Pat; + +// Binary operation +class RVPBinaryScalarPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, XLenVT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +let Predicates = [HasStdExtP] in { +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +def : RVPBinaryScalarPat; +} + +class RVPBinaryVector8Pat + : Pat<(XVEI8VT (!cast("int_riscv_" # IntID) + XVEI8VT:$rs1, XVEI8VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary8Pat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector8Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +defm : RVPBinary8Pat; +} + +class RVPBinaryVector8PBSADPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XVEI8VT:$rs1, XVEI8VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary8PBSADPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector8PBSADPat; +} + +let Predicates = [HasStdExtP] in +defm : RVPBinary8PBSADPat; + +class RVPBinaryVector8ShiftPat + : Pat<(XVEI8VT (!cast("int_riscv_" # IntID) + XVEI8VT:$rs1, XLenVT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary8ShiftPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector8ShiftPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +defm : RVPBinary8ShiftPat; +} + +class RVPBinaryScalarI3Pat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, uimm3:$rs2)), + (Inst GPR:$rs1, uimm3:$rs2)>; + +class RVPBinaryVector8ShiftI3Pat + : Pat<(XVEI8VT (!cast("int_riscv_" # IntID) + XVEI8VT:$rs1, uimm3:$rs2)), + (Inst GPR:$rs1, uimm3:$rs2)>; + +multiclass RVPBinary8ShiftI3Pat { + def : RVPBinaryScalarI3Pat; + def : RVPBinaryVector8ShiftI3Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary8ShiftI3Pat; +defm : RVPBinary8ShiftI3Pat; +} + +class RVPBinaryVector16Pat + : Pat<(XVEI16VT (!cast("int_riscv_" # IntID) + XVEI16VT:$rs1, XVEI16VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary16Pat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector16Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +defm : RVPBinary16Pat; +} + +class RVPBinaryVector16KMPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XVEI16VT:$rs1, XVEI16VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary16KMPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector16KMPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary16KMPat; +defm : RVPBinary16KMPat; +defm : RVPBinary16KMPat; +defm : RVPBinary16KMPat; +defm : RVPBinary16KMPat; +defm : RVPBinary16KMPat; +} + +class RVPBinaryVector16ShiftPat + : Pat<(XVEI16VT (!cast("int_riscv_" # IntID) + XVEI16VT:$rs1, XLenVT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary16ShiftPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector16ShiftPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +defm : RVPBinary16ShiftPat; +} + +class RVPBinaryScalarI4Pat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, uimm4:$rs2)), + (Inst GPR:$rs1, uimm4:$rs2)>; + +class RVPBinaryVector16ShiftI4Pat + : Pat<(XVEI16VT (!cast("int_riscv_" # IntID) + XVEI16VT:$rs1, uimm4:$rs2)), + (Inst GPR:$rs1, uimm4:$rs2)>; + +multiclass RVPBinary16ShiftI4Pat { + def : RVPBinaryScalarI4Pat; + def : RVPBinaryVector16ShiftI4Pat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary16ShiftI4Pat; +defm : RVPBinary16ShiftI4Pat; +} + +class RVPBinaryVector32Pat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI32VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary32Pat { + let Predicates = [HasStdExtP] in + def : RVPBinaryScalarPat; + let Predicates = [HasStdExtP, IsRV64] in + def : RVPBinaryVector32Pat; +} + +defm : RVPBinary32Pat; +defm : RVPBinary32Pat; +defm : RVPBinary32Pat; +defm : RVPBinary32Pat; + +class RVPBinaryVector32MPat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI16VT:$rs1, XVEI16VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary32MPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector32MPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +defm : RVPBinary32MPat; +} + +class RVPBinaryVector32MMWPat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI16VT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary32MMWPat { + def : RVPBinaryScalarPat; + def : RVPBinaryVector32MMWPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +defm : RVPBinary32MMWPat; +} + +class RVPBinaryScalarI5Pat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, uimm5:$rs2)), + (Inst GPR:$rs1, uimm5:$rs2)>; + +class RVPBinaryVector32ShiftI5Pat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XLenVT:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +multiclass RVPBinary32ShiftI5Pat { + let Predicates = [HasStdExtP] in + def : RVPBinaryScalarI5Pat; + let Predicates = [HasStdExtP, IsRV64] in + def : RVPBinaryVector32ShiftI5Pat; +} + +defm : RVPBinary32ShiftI5Pat; +defm : RVPBinary32ShiftI5Pat; + +// Ternary operation +class RVPTernaryScalarPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, XLenVT:$rs2, XLenVT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +let Predicates = [HasStdExtP] in +def : RVPTernaryScalarPat; + +class RVPTernaryINSBPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, XLenVT:$rs2, uimmlog2xlenbytes:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, uimmlog2xlenbytes:$rs3)>; + +let Predicates = [HasStdExtP] in +def : RVPTernaryINSBPat; + +class RVPTernaryVector8PBSADAPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, XVEI8VT:$rs2, XVEI8VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary8PBSADAPat { + def : RVPTernaryScalarPat; + def : RVPTernaryVector8PBSADAPat; +} + +let Predicates = [HasStdExtP] in +defm : RVPTernary8PBSADAPat; + +class RVPTernaryVector8MAQAPat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI8VT:$rs2, XVEI8VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary8MAQAPat { + def : RVPTernaryScalarPat; + def : RVPTernaryVector8MAQAPat; +} + +defm : RVPTernary8MAQAPat; +defm : RVPTernary8MAQAPat; +defm : RVPTernary8MAQAPat; + +class RVPTernaryVector16KMPat + : Pat<(XLenVT (!cast("int_riscv_" # IntID) + XLenVT:$rs1, XVEI16VT:$rs2, XVEI16VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary16KMPat { + def : RVPTernaryScalarPat; + def : RVPTernaryVector16KMPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPTernary16KMPat; +defm : RVPTernary16KMPat; +defm : RVPTernary16KMPat; +defm : RVPTernary16KMPat; +defm : RVPTernary16KMPat; +defm : RVPTernary16KMPat; +} + +class RVPTernaryVector32Pat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI32VT:$rs2, XVEI32VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary32Pat { + let Predicates = [HasStdExtP] in + def : RVPTernaryScalarPat; + let Predicates = [HasStdExtP, IsRV64] in + def : RVPTernaryVector32Pat; +} + +defm : RVPTernary32Pat; +defm : RVPTernary32Pat; +defm : RVPTernary32Pat; +defm : RVPTernary32Pat; + +class RVPTernaryVector32KMPat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI16VT:$rs2, XVEI16VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary32KMPat { + def : RVPTernaryScalarPat; + def : RVPTernaryVector32KMPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +defm : RVPTernary32KMPat; +} + +class RVPTernaryVector32KMMPat + : Pat<(XVEI32VT (!cast("int_riscv_" # IntID) + XVEI32VT:$rs1, XVEI32VT:$rs2, XVEI16VT:$rs3)), + (Inst GPR:$rs1, GPR:$rs2, GPR:$rs3)>; + +multiclass RVPTernary32KMMPat { + def : RVPTernaryScalarPat; + def : RVPTernaryVector32KMMPat; +} + +let Predicates = [HasStdExtP] in { +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +defm : RVPTernary32KMMPat; +} diff --git a/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv32p.ll b/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv32p.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv32p.ll @@ -0,0 +1,4611 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define i32 @add8(i32 %a, i32 %b) { +; CHECK-LABEL: add8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.add8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.add8.i32(i32, i32) + +define i32 @v_uadd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.add8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.add8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @v_sadd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_sadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.add8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +define i32 @add16(i32 %a, i32 %b) { +; CHECK-LABEL: add16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.add16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.add16.i32(i32, i32) + +define i32 @v_uadd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.add16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.add16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_sadd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_sadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.add16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @ave(i32 %a, i32 %b) { +; CHECK-LABEL: ave: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ave a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ave.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ave.i32(i32, i32) + +define i32 @bitrev(i32 %a, i32 %b) { +; CHECK-LABEL: bitrev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bitrev a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.bitrev.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.bitrev.i32(i32, i32) + +define i32 @bpick(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: bpick: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bpick a0, a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.bpick.i32(i32 %a, i32 %b, i32 %c) + ret i32 %0 +} + +declare i32 @llvm.riscv.bpick.i32(i32, i32, i32) + +define i32 @clrs8(i32 %a) { +; CHECK-LABEL: clrs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clrs8.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clrs8.i32(i32) + +define i32 @v_clrs8(i32 %a.coerce) { +; CHECK-LABEL: v_clrs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.clrs8.v4i8(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.clrs8.v4i8(<4 x i8>) + +define i32 @clrs16(i32 %a) { +; CHECK-LABEL: clrs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clrs16.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clrs16.i32(i32) + +define i32 @v_clrs16(i32 %a.coerce) { +; CHECK-LABEL: v_clrs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.clrs16.v2i16(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.clrs16.v2i16(<2 x i16>) + +define i32 @clrs32(i32 %a) { +; CHECK-LABEL: clrs32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clrs32.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clrs32.i32(i32) + +define i32 @clo8(i32 %a) { +; CHECK-LABEL: clo8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clo8.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clo8.i32(i32) + +define i32 @v_clo8(i32 %a.coerce) { +; CHECK-LABEL: v_clo8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.clo8.v4i8(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.clo8.v4i8(<4 x i8>) + +define i32 @clo16(i32 %a) { +; CHECK-LABEL: clo16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clo16.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clo16.i32(i32) + +define i32 @v_clo16(i32 %a.coerce) { +; CHECK-LABEL: v_clo16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.clo16.v2i16(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.clo16.v2i16(<2 x i16>) + +define i32 @clo32(i32 %a) { +; CHECK-LABEL: clo32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clo32.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clo32.i32(i32) + +define i32 @clz8(i32 %a) { +; CHECK-LABEL: clz8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clz8.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clz8.i32(i32) + +define i32 @v_clz8(i32 %a.coerce) { +; CHECK-LABEL: v_clz8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.clz8.v4i8(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.clz8.v4i8(<4 x i8>) + +define i32 @clz16(i32 %a) { +; CHECK-LABEL: clz16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clz16.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clz16.i32(i32) + +define i32 @v_clz16(i32 %a.coerce) { +; CHECK-LABEL: v_clz16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.clz16.v2i16(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.clz16.v2i16(<2 x i16>) + +define i32 @clz32(i32 %a) { +; CHECK-LABEL: clz32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.clz32.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.clz32.i32(i32) + +define i32 @cmpeq8(i32 %a, i32 %b) { +; CHECK-LABEL: cmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.cmpeq8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.cmpeq8.i32(i32, i32) + +define i32 @v_scmpeq8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.cmpeq8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.cmpeq8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @v_ucmpeq8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.cmpeq8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +define i32 @cmpeq16(i32 %a, i32 %b) { +; CHECK-LABEL: cmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.cmpeq16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.cmpeq16.i32(i32, i32) + +define i32 @v_scmpeq16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.cmpeq16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.cmpeq16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_ucmpeq16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.cmpeq16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @cras16(i32 %a, i32 %b) { +; CHECK-LABEL: cras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.cras16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.cras16.i32(i32, i32) + +define i32 @v_ucras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.cras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.cras16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_scras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.cras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @crsa16(i32 %a, i32 %b) { +; CHECK-LABEL: crsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.crsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.crsa16.i32(i32, i32) + +define i32 @v_ucrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.crsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.crsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_scrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.crsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @insb(i32 %a, i32 %b) { +; CHECK-LABEL: insb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: insb a0, a1, 3 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.insb.i32(i32 %a, i32 %b, i32 3) + ret i32 %0 +} + +declare i32 @llvm.riscv.insb.i32(i32, i32, i32) + +define i32 @kabs8(i32 %a) { +; CHECK-LABEL: kabs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kabs8.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.kabs8.i32(i32) + +define i32 @v_kabs8(i32 %a.coerce) { +; CHECK-LABEL: v_kabs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.kabs8.v4i8(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.kabs8.v4i8(<4 x i8>) + +define i32 @kabs16(i32 %a) { +; CHECK-LABEL: kabs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kabs16.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.kabs16.i32(i32) + +define i32 @v_kabs16(i32 %a.coerce) { +; CHECK-LABEL: v_kabs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.kabs16.v2i16(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.kabs16.v2i16(<2 x i16>) + +define i32 @kabsw(i32 %a) { +; CHECK-LABEL: kabsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabsw a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kabsw.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.kabsw.i32(i32) + +define i32 @kadd8(i32 %a, i32 %b) { +; CHECK-LABEL: kadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kadd8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kadd8.i32(i32, i32) + +define i32 @v_kadd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.kadd8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.kadd8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @kadd16(i32 %a, i32 %b) { +; CHECK-LABEL: kadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kadd16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kadd16.i32(i32, i32) + +define i32 @v_kadd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.kadd16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.kadd16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kaddh(i32 %a, i32 %b) { +; CHECK-LABEL: kaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kaddh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kaddh.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kaddh.i32(i32, i32) + +define i32 @kaddw(i32 %a, i32 %b) { +; CHECK-LABEL: kaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kaddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kaddw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kaddw.i32(i32, i32) + +define i32 @kcras16(i32 %a, i32 %b) { +; CHECK-LABEL: kcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kcras16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kcras16.i32(i32, i32) + +define i32 @v_kcras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.kcras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.kcras16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kcrsa16(i32 %a, i32 %b) { +; CHECK-LABEL: kcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kcrsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kcrsa16.i32(i32, i32) + +define i32 @v_kcrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.kcrsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.kcrsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kdmbb(i32 %a, i32 %b) { +; CHECK-LABEL: kdmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmbb.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmbb.i32.i32(i32, i32) + +define i32 @v_kdmbb(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmbb.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmbb.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kdmbt(i32 %a, i32 %b) { +; CHECK-LABEL: kdmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmbt.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmbt.i32.i32(i32, i32) + +define i32 @v_kdmbt(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmbt.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmbt.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kdmtt(i32 %a, i32 %b) { +; CHECK-LABEL: kdmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmtt.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmtt.i32.i32(i32, i32) + +define i32 @v_kdmtt(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmtt.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmtt.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kdmabb(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kdmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmabb.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmabb.i32.i32(i32, i32, i32) + +define i32 @v_kdmabb(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmabb.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmabb.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kdmabt(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kdmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmabt.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmabt.i32.i32(i32, i32, i32) + +define i32 @v_kdmabt(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmabt.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmabt.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kdmatt(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kdmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kdmatt.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kdmatt.i32.i32(i32, i32, i32) + +define i32 @v_kdmatt(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kdmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kdmatt.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kdmatt.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @khm8(i32 %a, i32 %b) { +; CHECK-LABEL: khm8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khm8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khm8.i32(i32, i32) + +define i32 @v_khm8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khm8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.khm8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.khm8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @khmx8(i32 %a, i32 %b) { +; CHECK-LABEL: khmx8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khmx8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khmx8.i32(i32, i32) + +define i32 @v_khmx8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khmx8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.khmx8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.khmx8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @khm16(i32 %a, i32 %b) { +; CHECK-LABEL: khm16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khm16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khm16.i32(i32, i32) + +define i32 @v_khm16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khm16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.khm16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.khm16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @khmx16(i32 %a, i32 %b) { +; CHECK-LABEL: khmx16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khmx16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khmx16.i32(i32, i32) + +define i32 @v_khmx16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khmx16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.khmx16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.khmx16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @khmbb(i32 %a, i32 %b) { +; CHECK-LABEL: khmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khmbb.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khmbb.i32.i32(i32, i32) + +define i32 @v_khmbb(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.khmbb.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.khmbb.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @khmbt(i32 %a, i32 %b) { +; CHECK-LABEL: khmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khmbt.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khmbt.i32.i32(i32, i32) + +define i32 @v_khmbt(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.khmbt.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.khmbt.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @khmtt(i32 %a, i32 %b) { +; CHECK-LABEL: khmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.khmtt.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.khmtt.i32.i32(i32, i32) + +define i32 @v_khmtt(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_khmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.khmtt.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.khmtt.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kmabb(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmabb.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmabb.i32.i32(i32, i32, i32) + +define i32 @v_kmabb(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmabb.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmabb.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmabt(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmabt.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmabt.i32.i32(i32, i32, i32) + +define i32 @v_kmabt(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmabt.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmabt.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmatt(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmatt.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmatt.i32.i32(i32, i32, i32) + +define i32 @v_kmatt(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmatt.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmatt.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmada(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmada.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmada.i32.i32(i32, i32, i32) + +define i32 @v_kmada(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmada.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmada.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmaxda(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmaxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmaxda.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmaxda.i32.i32(i32, i32, i32) + +define i32 @v_kmaxda(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmaxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmaxda.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmaxda.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmads(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmads: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmads a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmads.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmads.i32.i32(i32, i32, i32) + +define i32 @v_kmads(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmads: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmads a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmads.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmads.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmadrs(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmadrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmadrs a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmadrs.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmadrs.i32.i32(i32, i32, i32) + +define i32 @v_kmadrs(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmadrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmadrs a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmadrs.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmadrs.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmaxds(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmaxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxds a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmaxds.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmaxds.i32.i32(i32, i32, i32) + +define i32 @v_kmaxds(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmaxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxds a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmaxds.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmaxds.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmda(i32 %a, i32 %b) { +; CHECK-LABEL: kmda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmda.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmda.i32.i32(i32, i32) + +define i32 @v_kmda(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmda.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmda.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kmxda(i32 %a, i32 %b) { +; CHECK-LABEL: kmxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmxda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmxda.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmxda.i32.i32(i32, i32) + +define i32 @v_kmxda(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kmxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmxda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmxda.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmxda.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kmmac(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmac: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmac.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmac.i32(i32, i32, i32) + +define i32 @kmmac_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmac_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmac.u.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmac.u.i32(i32, i32, i32) + +define i32 @kmmawb(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawb.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawb.i32.i32(i32, i32, i32) + +define i32 @v_kmmawb(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawb.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawb.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawb_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawb.u.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawb.u.i32.i32(i32, i32, i32) + +define i32 @v_kmmawb_u(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawb.u.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawb.u.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawb2(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawb2.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawb2.i32.i32(i32, i32, i32) + +define i32 @v_kmmawb2(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawb2.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawb2.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawb2_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawb2.u.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawb2.u.i32.i32(i32, i32, i32) + +define i32 @v_kmmawb2_u(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawb2.u.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawb2.u.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawt(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawt.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawt.i32.i32(i32, i32, i32) + +define i32 @v_kmmawt(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawt.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawt.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawt_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawt.u.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawt.u.i32.i32(i32, i32, i32) + +define i32 @v_kmmawt_u(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawt.u.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawt.u.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawt2(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawt2.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawt2.i32.i32(i32, i32, i32) + +define i32 @v_kmmawt2(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawt2.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawt2.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmawt2_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmawt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmawt2.u.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmawt2.u.i32.i32(i32, i32, i32) + +define i32 @v_kmmawt2_u(i32 %t, i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmawt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmawt2.u.i32.v2i16(i32 %t, i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmawt2.u.i32.v2i16(i32, i32, <2 x i16>) + +define i32 @kmmsb(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmsb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmsb.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmsb.i32(i32, i32, i32) + +define i32 @kmmsb_u(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmmsb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmsb.u.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmsb.u.i32(i32, i32, i32) + +define i32 @kmmwb2(i32 %a, i32 %b) { +; CHECK-LABEL: kmmwb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmwb2.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmwb2.i32.i32(i32, i32) + +define i32 @v_kmmwb2(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmwb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmwb2.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmwb2.i32.v2i16(i32, <2 x i16>) + +define i32 @kmmwb2_u(i32 %a, i32 %b) { +; CHECK-LABEL: kmmwb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmwb2.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmwb2.u.i32.i32(i32, i32) + +define i32 @v_kmmwb2_u(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmwb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmwb2.u.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmwb2.u.i32.v2i16(i32, <2 x i16>) + +define i32 @kmmwt2(i32 %a, i32 %b) { +; CHECK-LABEL: kmmwt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmwt2.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmwt2.i32.i32(i32, i32) + +define i32 @v_kmmwt2(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmwt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmwt2.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmwt2.i32.v2i16(i32, <2 x i16>) + +define i32 @kmmwt2_u(i32 %a, i32 %b) { +; CHECK-LABEL: kmmwt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmmwt2.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmmwt2.u.i32.i32(i32, i32) + +define i32 @v_kmmwt2_u(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_kmmwt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.kmmwt2.u.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.kmmwt2.u.i32.v2i16(i32, <2 x i16>) + +define i32 @kmsda(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmsda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmsda.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmsda.i32.i32(i32, i32, i32) + +define i32 @k_kmsda(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: k_kmsda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmsda.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmsda.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @kmsxda(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: kmsxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kmsxda.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kmsxda.i32.i32(i32, i32, i32) + +define i32 @k_kmsxda(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: k_kmsxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.kmsxda.i32.v2i16(i32 %t, <2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.kmsxda.i32.v2i16(i32, <2 x i16>, <2 x i16>) + +define i32 @ksllw(i32 %a, i32 %b) { +; CHECK-LABEL: ksllw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksllw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksllw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksllw.i32(i32, i32) + +define i32 @ksll8(i32 %a, i32 %b) { +; CHECK-LABEL: ksll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksll8.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksll8.i32.i32(i32, i32) + +define i32 @v_ksll8(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_ksll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.ksll8.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.ksll8.v4i8.i32(<4 x i8>, i32) + +define i32 @ksll16(i32 %a, i32 %b) { +; CHECK-LABEL: ksll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksll16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksll16.i32.i32(i32, i32) + +define i32 @v_ksll16(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_ksll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.ksll16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.ksll16.v2i16.i32(<2 x i16>, i32) + +define i32 @kslra8(i32 %a, i32 %b) { +; CHECK-LABEL: kslra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslra8.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kslra8.i32.i32(i32, i32) + +define i32 @v_kslra8(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_kslra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.kslra8.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.kslra8.v4i8.i32(<4 x i8>, i32) + +define i32 @kslra8_u(i32 %a, i32 %b) { +; CHECK-LABEL: kslra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslra8.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kslra8.u.i32.i32(i32, i32) + +define i32 @v_kslra8_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_kslra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.kslra8.u.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.kslra8.u.v4i8.i32(<4 x i8>, i32) + +define i32 @kslra16(i32 %a, i32 %b) { +; CHECK-LABEL: kslra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslra16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kslra16.i32.i32(i32, i32) + +define i32 @v_kslra16(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_kslra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.kslra16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.kslra16.v2i16.i32(<2 x i16>, i32) + +define i32 @kslra16_u(i32 %a, i32 %b) { +; CHECK-LABEL: kslra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslra16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +define i32 @v_kslra16_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_kslra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.kslra16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +define i32 @kslraw(i32 %a, i32 %b) { +; CHECK-LABEL: kslraw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslraw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslraw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kslraw.i32(i32, i32) + +define i32 @kslraw_u(i32 %a, i32 %b) { +; CHECK-LABEL: kslraw_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslraw.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kslraw.u.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kslraw.u.i32(i32, i32) + +define i32 @kstas16(i32 %a, i32 %b) { +; CHECK-LABEL: kstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kstas16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kstas16.i32(i32, i32) + +define i32 @v_kstas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.kstas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.kstas16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @kstsa16(i32 %a, i32 %b) { +; CHECK-LABEL: kstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kstsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kstsa16.i32(i32, i32) + +define i32 @v_kstsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_kstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.kstsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.kstsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ksub8(i32 %a, i32 %b) { +; CHECK-LABEL: ksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksub8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksub8.i32(i32, i32) + +define i32 @v_ksub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.ksub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.ksub8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @ksub16(i32 %a, i32 %b) { +; CHECK-LABEL: ksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksub16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksub16.i32(i32, i32) + +define i32 @v_ksub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ksub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ksub16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ksubh(i32 %a, i32 %b) { +; CHECK-LABEL: ksubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksubh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksubh.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksubh.i32(i32, i32) + +define i32 @ksubw(i32 %a, i32 %b) { +; CHECK-LABEL: ksubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ksubw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ksubw.i32(i32, i32) + +define i32 @kwmmul(i32 %a, i32 %b) { +; CHECK-LABEL: kwmmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kwmmul.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kwmmul.i32(i32, i32) + +define i32 @kwmmul_u(i32 %a, i32 %b) { +; CHECK-LABEL: kwmmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.kwmmul.u.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.kwmmul.u.i32(i32, i32) + +define i32 @maxw(i32 %a, i32 %b) { +; CHECK-LABEL: maxw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: maxw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.maxw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.maxw.i32(i32, i32) + +define i32 @minw(i32 %a, i32 %b) { +; CHECK-LABEL: minw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: minw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.minw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.minw.i32(i32, i32) + +define i32 @pbsad(i32 %a, i32 %b) { +; CHECK-LABEL: pbsad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsad a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pbsad.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pbsad.i32.i32(i32, i32) + +define i32 @v_pbsad(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pbsad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsad a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call i32 @llvm.riscv.pbsad.i32.v4i8(<4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.pbsad.i32.v4i8(<4 x i8>, <4 x i8>) + +define i32 @pbsada(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: pbsada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pbsada.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pbsada.i32.i32(i32, i32, i32) + +define i32 @v_pbsada(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pbsada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call i32 @llvm.riscv.pbsada.i32.v4i8(i32 %t, <4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.pbsada.i32.v4i8(i32, <4 x i8>, <4 x i8>) + +define i32 @pkbb16(i32 %a, i32 %b) { +; CHECK-LABEL: pkbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pkbb16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pkbb16.i32(i32, i32) + +define i32 @v_pkbb16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pkbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.pkbb16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.pkbb16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @pkbt16(i32 %a, i32 %b) { +; CHECK-LABEL: pkbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pkbt16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pkbt16.i32(i32, i32) + +define i32 @v_pkbt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pkbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.pkbt16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.pkbt16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @pktb16(i32 %a, i32 %b) { +; CHECK-LABEL: pktb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pktb16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pktb16.i32(i32, i32) + +define i32 @v_pktb16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pktb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.pktb16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.pktb16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @pktt16(i32 %a, i32 %b) { +; CHECK-LABEL: pktt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.pktt16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.pktt16.i32(i32, i32) + +define i32 @v_pktt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_pktt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.pktt16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.pktt16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @radd8(i32 %a, i32 %b) { +; CHECK-LABEL: radd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.radd8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.radd8.i32(i32, i32) + +define i32 @v_radd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_radd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.radd8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.radd8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @radd16(i32 %a, i32 %b) { +; CHECK-LABEL: radd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.radd16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.radd16.i32(i32, i32) + +define i32 @v_radd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_radd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.radd16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.radd16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @raddw(i32 %a, i32 %b) { +; CHECK-LABEL: raddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: raddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.raddw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.raddw.i32(i32, i32) + +define i32 @rcras16(i32 %a, i32 %b) { +; CHECK-LABEL: rcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rcras16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rcras16.i32(i32, i32) + +define i32 @v_rcras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.rcras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.rcras16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @rcrsa16(i32 %a, i32 %b) { +; CHECK-LABEL: rcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rcrsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rcrsa16.i32(i32, i32) + +define i32 @v_rcrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.rcrsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.rcrsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @rstas16(i32 %a, i32 %b) { +; CHECK-LABEL: rstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rstas16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rstas16.i32(i32, i32) + +define i32 @v_rstas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.rstas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.rstas16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @rstsa16(i32 %a, i32 %b) { +; CHECK-LABEL: rstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rstsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rstsa16.i32(i32, i32) + +define i32 @v_rstsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.rstsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.rstsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @rsub8(i32 %a, i32 %b) { +; CHECK-LABEL: rsub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rsub8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rsub8.i32(i32, i32) + +define i32 @v_rsub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rsub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.rsub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.rsub8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @rsub16(i32 %a, i32 %b) { +; CHECK-LABEL: rsub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rsub16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rsub16.i32(i32, i32) + +define i32 @v_rsub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_rsub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.rsub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.rsub16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @rsubw(i32 %a, i32 %b) { +; CHECK-LABEL: rsubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.rsubw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.rsubw.i32(i32, i32) + +define i32 @sclip8(i32 %a) { +; CHECK-LABEL: sclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip8 a0, a0, 5 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sclip8.i32.i32(i32 %a, i32 5) + ret i32 %0 +} + +declare i32 @llvm.riscv.sclip8.i32.i32(i32, i32) + +define i32 @v_sclip8(i32 %a.coerce) { +; CHECK-LABEL: v_sclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip8 a0, a0, 5 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.sclip8.v4i8.i32(<4 x i8> %0, i32 5) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.sclip8.v4i8.i32(<4 x i8>, i32) + +define i32 @sclip16(i32 %a) { +; CHECK-LABEL: sclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip16 a0, a0, 6 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sclip16.i32.i32(i32 %a, i32 6) + ret i32 %0 +} + +declare i32 @llvm.riscv.sclip16.i32.i32(i32, i32) + +define i32 @v_sclip16(i32 %a.coerce) { +; CHECK-LABEL: v_sclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip16 a0, a0, 6 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.sclip16.v2i16.i32(<2 x i16> %0, i32 6) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.sclip16.v2i16.i32(<2 x i16>, i32) + +define i32 @slcip32(i32 %a) { +; CHECK-LABEL: slcip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip32 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sclip32.i32.i32(i32 %a, i32 7) + ret i32 %0 +} + +declare i32 @llvm.riscv.sclip32.i32.i32(i32, i32) + +define i32 @scmple8(i32 %a, i32 %b) { +; CHECK-LABEL: scmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.scmple8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.scmple8.i32(i32, i32) + +define i32 @v_scmple8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.scmple8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.scmple8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @scmple16(i32 %a, i32 %b) { +; CHECK-LABEL: scmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.scmple16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.scmple16.i32(i32, i32) + +define i32 @v_scmple16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.scmple16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.scmple16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @scmplt8(i32 %a, i32 %b) { +; CHECK-LABEL: scmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.scmplt8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.scmplt8.i32(i32, i32) + +define i32 @v_scmplt8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.scmplt8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.scmplt8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @scmplt16(i32 %a, i32 %b) { +; CHECK-LABEL: scmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.scmplt16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.scmplt16.i32(i32, i32) + +define i32 @v_scmplt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_scmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.scmplt16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.scmplt16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @sll8(i32 %a, i32 %b) { +; CHECK-LABEL: sll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sll8.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sll8.i32.i32(i32, i32) + +define i32 @v_sll8(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.sll8.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.sll8.v4i8.i32(<4 x i8>, i32) + +define i32 @sll16(i32 %a, i32 %b) { +; CHECK-LABEL: sll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sll16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sll16.i32.i32(i32, i32) + +define i32 @v_sll16(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.sll16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.sll16.v2i16.i32(<2 x i16>, i32) + +define i32 @smaqa(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: smaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smaqa.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smaqa.i32.i32(i32, i32, i32) + +define i32 @v_smaqa(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call i32 @llvm.riscv.smaqa.i32.v4i8(i32 %t, <4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smaqa.i32.v4i8(i32, <4 x i8>, <4 x i8>) + +define i32 @smaqa_su(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: smaqa_su: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa.su a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smaqa.su.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smaqa.su.i32.i32(i32, i32, i32) + +define i32 @v_smaqa_su(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smaqa_su: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa.su a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call i32 @llvm.riscv.smaqa.su.i32.v4i8(i32 %t, <4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smaqa.su.i32.v4i8(i32, <4 x i8>, <4 x i8>) + +define i32 @smax8(i32 %a, i32 %b) { +; CHECK-LABEL: smax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smax8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smax8.i32(i32, i32) + +define i32 @v_smax8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.smax8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.smax8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @smax16(i32 %a, i32 %b) { +; CHECK-LABEL: smax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smax16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smax16.i32(i32, i32) + +define i32 @v_smax16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.smax16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.smax16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smbb16(i32 %a, i32 %b) { +; CHECK-LABEL: smbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smbb16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smbb16.i32.i32(i32, i32) + +define i32 @v_smbb16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smbb16.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smbb16.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smbt16(i32 %a, i32 %b) { +; CHECK-LABEL: smbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smbt16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smbt16.i32.i32(i32, i32) + +define i32 @v_smbt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smbt16.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smbt16.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smtt16(i32 %a, i32 %b) { +; CHECK-LABEL: smtt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smtt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smtt16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smtt16.i32.i32(i32, i32) + +define i32 @v_smtt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smtt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smtt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smtt16.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smtt16.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smds(i32 %a, i32 %b) { +; CHECK-LABEL: smds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smds.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smds.i32.i32(i32, i32) + +define i32 @v_smds(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smds.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smds.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smdrs(i32 %a, i32 %b) { +; CHECK-LABEL: smdrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smdrs a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smdrs.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smdrs.i32.i32(i32, i32) + +define i32 @v_smdrs(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smdrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smdrs a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smdrs.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smdrs.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smxds(i32 %a, i32 %b) { +; CHECK-LABEL: smxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smxds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smxds.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smxds.i32.i32(i32, i32) + +define i32 @v_smxds(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smxds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call i32 @llvm.riscv.smxds.i32.v2i16(<2 x i16> %0, <2 x i16> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.smxds.i32.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smin8(i32 %a, i32 %b) { +; CHECK-LABEL: smin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smin8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smin8.i32(i32, i32) + +define i32 @v_smin8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.smin8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.smin8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @smin16(i32 %a, i32 %b) { +; CHECK-LABEL: smin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smin16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smin16.i32(i32, i32) + +define i32 @v_smin16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_smin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.smin16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.smin16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @smmul(i32 %a, i32 %b) { +; CHECK-LABEL: smmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmul.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmul.i32(i32, i32) + +define i32 @smmul_u(i32 %a, i32 %b) { +; CHECK-LABEL: smmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmul.u.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmul.u.i32(i32, i32) + +define i32 @smmwb(i32 %a, i32 %b) { +; CHECK-LABEL: smmwb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmwb.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmwb.i32.i32(i32, i32) + +define i32 @v_smmwb(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_smmwb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.smmwb.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.smmwb.i32.v2i16(i32, <2 x i16>) + +define i32 @smmwb_u(i32 %a, i32 %b) { +; CHECK-LABEL: smmwb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmwb.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmwb.u.i32.i32(i32, i32) + +define i32 @v_smmwb_u(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_smmwb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.smmwb.u.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.smmwb.u.i32.v2i16(i32, <2 x i16>) + +define i32 @smmwt(i32 %a, i32 %b) { +; CHECK-LABEL: smmwt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmwt.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmwt.i32.i32(i32, i32) + +define i32 @v_smmwt(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_smmwt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.smmwt.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.smmwt.i32.v2i16(i32, <2 x i16>) + +define i32 @smmwt_u(i32 %a, i32 %b) { +; CHECK-LABEL: smmwt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.smmwt.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.smmwt.u.i32.i32(i32, i32) + +define i32 @v_smmwt_u(i32 %a, i32 %b.coerce) { +; CHECK-LABEL: v_smmwt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %b.coerce to <2 x i16> + %1 = tail call i32 @llvm.riscv.smmwt.u.i32.v2i16(i32 %a, <2 x i16> %0) + ret i32 %1 +} + +declare i32 @llvm.riscv.smmwt.u.i32.v2i16(i32, <2 x i16>) + +define i32 @sra_u(i32 %a, i32 %b) { +; CHECK-LABEL: sra_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sra.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sra.u.i32.i32(i32, i32) + +define i32 @sra8(i32 %a, i32 %b) { +; CHECK-LABEL: sra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sra8.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sra8.i32.i32(i32, i32) + +define i32 @v_sra8(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.sra8.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.sra8.v4i8.i32(<4 x i8>, i32) + +define i32 @sra8_u(i32 %a, i32 %b) { +; CHECK-LABEL: sra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sra8.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sra8.u.i32.i32(i32, i32) + +define i32 @v_sra8_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.sra8.u.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.sra8.u.v4i8.i32(<4 x i8>, i32) + +define i32 @sra16(i32 %a, i32 %b) { +; CHECK-LABEL: sra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sra16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sra16.i32.i32(i32, i32) + +define i32 @v_sra16(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.sra16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.sra16.v2i16.i32(<2 x i16>, i32) + +define i32 @sra16_u(i32 %a, i32 %b) { +; CHECK-LABEL: sra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sra16.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sra16.u.i32.i32(i32, i32) + +define i32 @v_sra16_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_sra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.sra16.u.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.sra16.u.v2i16.i32(<2 x i16>, i32) + +define i32 @srl8(i32 %a, i32 %b) { +; CHECK-LABEL: srl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.srl8.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.srl8.i32.i32(i32, i32) + +define i32 @v_srl8(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_srl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.srl8.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.srl8.v4i8.i32(<4 x i8>, i32) + +define i32 @srl8_u(i32 %a, i32 %b) { +; CHECK-LABEL: srl8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.srl8.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.srl8.u.i32.i32(i32, i32) + +define i32 @v_srl8_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_srl8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.srl8.u.v4i8.i32(<4 x i8> %0, i32 %b) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.srl8.u.v4i8.i32(<4 x i8>, i32) + +define i32 @srl16(i32 %a, i32 %b) { +; CHECK-LABEL: srl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.srl16.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.srl16.i32.i32(i32, i32) + +define i32 @v_srl16(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_srl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.srl16.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.srl16.v2i16.i32(<2 x i16>, i32) + +define i32 @srl16_u(i32 %a, i32 %b) { +; CHECK-LABEL: srl16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.srl16.u.i32.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.srl16.u.i32.i32(i32, i32) + +define i32 @v_srl16_u(i32 %a.coerce, i32 %b) { +; CHECK-LABEL: v_srl16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.srl16.u.v2i16.i32(<2 x i16> %0, i32 %b) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.srl16.u.v2i16.i32(<2 x i16>, i32) + +define i32 @stas16(i32 %a, i32 %b) { +; CHECK-LABEL: stas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.stas16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.stas16.i32(i32, i32) + +define i32 @v_ustas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ustas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.stas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.stas16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_sstas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_sstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.stas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @stsa16(i32 %a, i32 %b) { +; CHECK-LABEL: stsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.stsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.stsa16.i32(i32, i32) + +define i32 @v_ustsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ustsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.stsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.stsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_sstsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_sstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.stsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @sub8(i32 %a, i32 %b) { +; CHECK-LABEL: sub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sub8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sub8.i32(i32, i32) + +define i32 @v_usub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_usub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.sub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.sub8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @v_ssub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ssub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.sub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +define i32 @sub16(i32 %a, i32 %b) { +; CHECK-LABEL: sub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sub16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.sub16.i32(i32, i32) + +define i32 @v_usub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_usub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.sub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.sub16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @v_ssub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ssub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.sub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +define i32 @sunpkd810(i32 %a) { +; CHECK-LABEL: sunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sunpkd810.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.sunpkd810.i32(i32) + +define i32 @v_sunpkd810(i32 %a.coerce) { +; CHECK-LABEL: v_sunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.sunpkd810.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.sunpkd810.v2i16(<4 x i8>) + +define i32 @sunpkd820(i32 %a) { +; CHECK-LABEL: sunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sunpkd820.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.sunpkd820.i32(i32) + +define i32 @v_sunpkd820(i32 %a.coerce) { +; CHECK-LABEL: v_sunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.sunpkd820.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.sunpkd820.v2i16(<4 x i8>) + +define i32 @sunpkd830(i32 %a) { +; CHECK-LABEL: sunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sunpkd830.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.sunpkd830.i32(i32) + +define i32 @v_sunpkd830(i32 %a.coerce) { +; CHECK-LABEL: v_sunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.sunpkd830.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.sunpkd830.v2i16(<4 x i8>) + +define i32 @sunpkd831(i32 %a) { +; CHECK-LABEL: sunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sunpkd831.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.sunpkd831.i32(i32) + +define i32 @v_sunpkd831(i32 %a.coerce) { +; CHECK-LABEL: v_sunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.sunpkd831.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.sunpkd831.v2i16(<4 x i8>) + +define i32 @sunpkd832(i32 %a) { +; CHECK-LABEL: sunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.sunpkd832.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.sunpkd832.i32(i32) + +define i32 @v_sunpkd832(i32 %a.coerce) { +; CHECK-LABEL: v_sunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.sunpkd832.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.sunpkd832.v2i16(<4 x i8>) + +define i32 @swap8(i32 %a) { +; CHECK-LABEL: swap8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.swap8.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.swap8.i32(i32) + +define i32 @v_swap8(i32 %a.coerce) { +; CHECK-LABEL: v_swap8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.swap8.v4i8(<4 x i8> %0) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.swap8.v4i8(<4 x i8>) + +define i32 @swap16(i32 %a) { +; CHECK-LABEL: swap16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.swap16.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.swap16.i32(i32) + +define i32 @v_swap16(i32 %a.coerce) { +; CHECK-LABEL: v_swap16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.swap16.v2i16(<2 x i16> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.swap16.v2i16(<2 x i16>) + +define i32 @uclip8(i32 %a) { +; CHECK-LABEL: uclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip8 a0, a0, 5 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uclip8.i32.i32(i32 %a, i32 5) + ret i32 %0 +} + +declare i32 @llvm.riscv.uclip8.i32.i32(i32, i32) + +define i32 @v_uclip8(i32 %a.coerce) { +; CHECK-LABEL: v_uclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip8 a0, a0, 5 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <4 x i8> @llvm.riscv.uclip8.v4i8.i32(<4 x i8> %0, i32 5) + %2 = bitcast <4 x i8> %1 to i32 + ret i32 %2 +} + +declare <4 x i8> @llvm.riscv.uclip8.v4i8.i32(<4 x i8>, i32) + +define i32 @uclip16(i32 %a) { +; CHECK-LABEL: uclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip16 a0, a0, 6 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uclip16.i32.i32(i32 %a, i32 6) + ret i32 %0 +} + +declare i32 @llvm.riscv.uclip16.i32.i32(i32, i32) + +define i32 @v_uclip16(i32 %a.coerce) { +; CHECK-LABEL: v_uclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip16 a0, a0, 6 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = tail call <2 x i16> @llvm.riscv.uclip16.v2i16.i32(<2 x i16> %0, i32 6) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.uclip16.v2i16.i32(<2 x i16>, i32) + +define i32 @uclip32(i32 %a) { +; CHECK-LABEL: uclip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip32 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uclip32.i32.i32(i32 %a, i32 7) + ret i32 %0 +} + +declare i32 @llvm.riscv.uclip32.i32.i32(i32, i32) + +define i32 @ucmple8(i32 %a, i32 %b) { +; CHECK-LABEL: ucmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ucmple8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ucmple8.i32(i32, i32) + +define i32 @v_ucmple8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.ucmple8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.ucmple8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @ucmple16(i32 %a, i32 %b) { +; CHECK-LABEL: ucmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ucmple16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ucmple16.i32(i32, i32) + +define i32 @v_ucmple16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ucmple16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ucmple16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ucmplt8(i32 %a, i32 %b) { +; CHECK-LABEL: ucmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ucmplt8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ucmplt8.i32(i32, i32) + +define i32 @v_ucmplt8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.ucmplt8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.ucmplt8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @ucmplt16(i32 %a, i32 %b) { +; CHECK-LABEL: ucmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ucmplt16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ucmplt16.i32(i32, i32) + +define i32 @v_ucmplt16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ucmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ucmplt16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ucmplt16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ukadd8(i32 %a, i32 %b) { +; CHECK-LABEL: ukadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukadd8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukadd8.i32(i32, i32) + +define i32 @v_ukadd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.ukadd8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.ukadd8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @ukadd16(i32 %a, i32 %b) { +; CHECK-LABEL: ukadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukadd16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukadd16.i32(i32, i32) + +define i32 @v_ukadd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ukadd16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ukadd16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ukaddh(i32 %a, i32 %b) { +; CHECK-LABEL: ukaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukaddh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukaddh.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukaddh.i32(i32, i32) + +define i32 @ukaddw(i32 %a, i32 %b) { +; CHECK-LABEL: ukaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukaddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukaddw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukaddw.i32(i32, i32) + +define i32 @ukcras16(i32 %a, i32 %b) { +; CHECK-LABEL: ukcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukcras16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukcras16.i32(i32, i32) + +define i32 @v_ukcras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ukcras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ukcras16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ukcrsa16(i32 %a, i32 %b) { +; CHECK-LABEL: ukcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukcrsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukcrsa16.i32(i32, i32) + +define i32 @v_ukcrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ukcrsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ukcrsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ukstas16(i32 %a, i32 %b) { +; CHECK-LABEL: ukstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukstas16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukstas16.i32(i32, i32) + +define i32 @v_ukstas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ukstas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ukstas16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ukstsa16(i32 %a, i32 %b) { +; CHECK-LABEL: ukstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ukstsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ukstsa16.i32(i32, i32) + +define i32 @v_ukstsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ukstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ukstsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ukstsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @uksub8(i32 %a, i32 %b) { +; CHECK-LABEL: uksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uksub8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uksub8.i32(i32, i32) + +define i32 @v_uksub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.uksub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.uksub8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @uksub16(i32 %a, i32 %b) { +; CHECK-LABEL: uksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uksub16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uksub16.i32(i32, i32) + +define i32 @v_uksub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.uksub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.uksub16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @uksubh(i32 %a, i32 %b) { +; CHECK-LABEL: uksubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksubh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uksubh.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uksubh.i32(i32, i32) + +define i32 @uksubw(i32 %a, i32 %b) { +; CHECK-LABEL: uksubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uksubw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uksubw.i32(i32, i32) + +define i32 @umaqa(i32 %t, i32 %a, i32 %b) { +; CHECK-LABEL: umaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.umaqa.i32.i32(i32 %t, i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.umaqa.i32.i32(i32, i32, i32) + +define i32 @v_umaqa(i32 %t, i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_umaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call i32 @llvm.riscv.umaqa.i32.v4i8(i32 %t, <4 x i8> %0, <4 x i8> %1) + ret i32 %2 +} + +declare i32 @llvm.riscv.umaqa.i32.v4i8(i32, <4 x i8>, <4 x i8>) + +define i32 @umax8(i32 %a, i32 %b) { +; CHECK-LABEL: umax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.umax8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.umax8.i32(i32, i32) + +define i32 @v_umax8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_umax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.umax8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.umax8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @umax16(i32 %a, i32 %b) { +; CHECK-LABEL: umax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.umax8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +define i32 @v_umax16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_umax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.umax16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.umax16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @umin8(i32 %a, i32 %b) { +; CHECK-LABEL: umin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.umin8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.umin8.i32(i32, i32) + +define i32 @v_umin8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_umin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.umin8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.umin8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @umin16(i32 %a, i32 %b) { +; CHECK-LABEL: umin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.umin8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +define i32 @v_umin16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_umin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.umin16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.umin16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @uradd8(i32 %a, i32 %b) { +; CHECK-LABEL: uradd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uradd8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uradd8.i32(i32, i32) + +define i32 @v_uradd8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uradd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.uradd8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.uradd8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @uradd16(i32 %a, i32 %b) { +; CHECK-LABEL: uradd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uradd16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uradd16.i32(i32, i32) + +define i32 @v_uradd16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_uradd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.uradd16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.uradd16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @uraddw(i32 %a, i32 %b) { +; CHECK-LABEL: uraddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uraddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.uraddw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.uraddw.i32(i32, i32) + +define i32 @urcras16(i32 %a, i32 %b) { +; CHECK-LABEL: urcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.urcras16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.urcras16.i32(i32, i32) + +define i32 @v_urcras16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_urcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.urcras16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.urcras16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @urcrsa16(i32 %a, i32 %b) { +; CHECK-LABEL: urcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.urcrsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.urcrsa16.i32(i32, i32) + +define i32 @v_urcrsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_urcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.urcrsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.urcrsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @urstas16(i32 %a, i32 %b) { +; CHECK-LABEL: urstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.urstas16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.urstas16.i32(i32, i32) + +define i32 @v_urstas16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_urstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.urstas16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.urstas16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @urstsa16(i32 %a, i32 %b) { +; CHECK-LABEL: urstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.urstsa16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.urstsa16.i32(i32, i32) + +define i32 @v_urstsa16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_urstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.urstsa16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.urstsa16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ursub8(i32 %a, i32 %b) { +; CHECK-LABEL: ursub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ursub8.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ursub8.i32(i32, i32) + +define i32 @v_ursub8(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ursub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = bitcast i32 %b.coerce to <4 x i8> + %2 = tail call <4 x i8> @llvm.riscv.ursub8.v4i8(<4 x i8> %0, <4 x i8> %1) + %3 = bitcast <4 x i8> %2 to i32 + ret i32 %3 +} + +declare <4 x i8> @llvm.riscv.ursub8.v4i8(<4 x i8>, <4 x i8>) + +define i32 @ursub16(i32 %a, i32 %b) { +; CHECK-LABEL: ursub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ursub16.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ursub16.i32(i32, i32) + +define i32 @v_ursub16(i32 %a.coerce, i32 %b.coerce) { +; CHECK-LABEL: v_ursub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = tail call <2 x i16> @llvm.riscv.ursub16.v2i16(<2 x i16> %0, <2 x i16> %1) + %3 = bitcast <2 x i16> %2 to i32 + ret i32 %3 +} + +declare <2 x i16> @llvm.riscv.ursub16.v2i16(<2 x i16>, <2 x i16>) + +define i32 @ursubw(i32 %a, i32 %b) { +; CHECK-LABEL: ursubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.ursubw.i32(i32 %a, i32 %b) + ret i32 %0 +} + +declare i32 @llvm.riscv.ursubw.i32(i32, i32) + +define i32 @zunpkd810(i32 %a) { +; CHECK-LABEL: zunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.zunpkd810.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.zunpkd810.i32(i32) + +define i32 @v_zunpkd810(i32 %a.coerce) { +; CHECK-LABEL: v_zunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.zunpkd810.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.zunpkd810.v2i16(<4 x i8>) + +define i32 @zunpkd820(i32 %a) { +; CHECK-LABEL: zunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.zunpkd820.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.zunpkd820.i32(i32) + +define i32 @v_zunpkd820(i32 %a.coerce) { +; CHECK-LABEL: v_zunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.zunpkd820.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.zunpkd820.v2i16(<4 x i8>) + +define i32 @zunpkd830(i32 %a) { +; CHECK-LABEL: zunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.zunpkd830.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.zunpkd830.i32(i32) + +define i32 @v_zunpkd830(i32 %a.coerce) { +; CHECK-LABEL: v_zunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.zunpkd830.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.zunpkd830.v2i16(<4 x i8>) + +define i32 @zunpkd831(i32 %a) { +; CHECK-LABEL: zunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.zunpkd831.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.zunpkd831.i32(i32) + +define i32 @v_zunpkd831(i32 %a.coerce) { +; CHECK-LABEL: v_zunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.zunpkd831.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.zunpkd831.v2i16(<4 x i8>) + +define i32 @zunpkd832(i32 %a) { +; CHECK-LABEL: zunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.riscv.zunpkd832.i32(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.riscv.zunpkd832.i32(i32) + +define i32 @v_zunpkd832(i32 %a.coerce) { +; CHECK-LABEL: v_zunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i32 %a.coerce to <4 x i8> + %1 = tail call <2 x i16> @llvm.riscv.v.zunpkd832.v2i16(<4 x i8> %0) + %2 = bitcast <2 x i16> %1 to i32 + ret i32 %2 +} + +declare <2 x i16> @llvm.riscv.v.zunpkd832.v2i16(<4 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv64p.ll b/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv64p.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/intrinsics-rv64p.ll @@ -0,0 +1,5069 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define i64 @add8(i64 %a, i64 %b) { +; CHECK-LABEL: add8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.add8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.add8.i64(i64, i64) + +define i64 @v_uadd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_uadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.add8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.add8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @v_sadd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_sadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.add8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +define i64 @add16(i64 %a, i64 %b) { +; CHECK-LABEL: add16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.add16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.add16.i64(i64, i64) + +define i64 @uadd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: uadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.add16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.add16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @sadd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: sadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.add16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @ave(i64 %a, i64 %b) { +; CHECK-LABEL: ave: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ave a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ave.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ave.i64(i64, i64) + +define i64 @bitrev(i64 %a, i64 %b) { +; CHECK-LABEL: bitrev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bitrev a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.bitrev.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.bitrev.i64(i64, i64) + +define i64 @bpick(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: bpick: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bpick a0, a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.bpick.i64(i64 %a, i64 %b, i64 %c) + ret i64 %0 +} + +declare i64 @llvm.riscv.bpick.i64(i64, i64, i64) + +define i64 @clrs8(i64 %a) { +; CHECK-LABEL: clrs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clrs8.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clrs8.i64(i64) + +define i64 @v_clrs8(i64 %a.coerce) { +; CHECK-LABEL: v_clrs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.clrs8.v8i8(<8 x i8> %0) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.clrs8.v8i8(<8 x i8>) + +define i64 @clrs16(i64 %a) { +; CHECK-LABEL: clrs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clrs16.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clrs16.i64(i64) + +define i64 @v_clrs16(i64 %a.coerce) { +; CHECK-LABEL: v_clrs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.clrs16.v4i16(<4 x i16> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.clrs16.v4i16(<4 x i16>) + +define i64 @clrs32(i64 %a) { +; CHECK-LABEL: clrs32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clrs32.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clrs32.i64(i64) + +define i64 @v_clrs32(i64 %a.coerce) { +; CHECK-LABEL: v_clrs32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clrs32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = tail call <2 x i32> @llvm.riscv.clrs32.v2i32(<2 x i32> %0) + %2 = bitcast <2 x i32> %1 to i64 + ret i64 %2 +} + +declare <2 x i32> @llvm.riscv.clrs32.v2i32(<2 x i32>) + +define i64 @clo8(i64 %a) { +; CHECK-LABEL: clo8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clo8.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clo8.i64(i64) + +define i64 @v_clo8(i64 %a.coerce) { +; CHECK-LABEL: v_clo8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.clo8.v8i8(<8 x i8> %0) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.clo8.v8i8(<8 x i8>) + +define i64 @clo16(i64 %a) { +; CHECK-LABEL: clo16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clo16.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clo16.i64(i64) + +define i64 @v_clo16(i64 %a.coerce) { +; CHECK-LABEL: v_clo16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.clo16.v4i16(<4 x i16> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.clo16.v4i16(<4 x i16>) + +define i64 @clo32(i64 %a) { +; CHECK-LABEL: clo32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clo32.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clo32.i64(i64) + +define i64 @v_clo32(i64 %a.coerce) { +; CHECK-LABEL: v_clo32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clo32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = tail call <2 x i32> @llvm.riscv.clo32.v2i32(<2 x i32> %0) + %2 = bitcast <2 x i32> %1 to i64 + ret i64 %2 +} + +declare <2 x i32> @llvm.riscv.clo32.v2i32(<2 x i32>) + +define i64 @clz8(i64 %a) { +; CHECK-LABEL: clz8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clz8.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clz8.i64(i64) + +define i64 @v_clz8(i64 %a.coerce) { +; CHECK-LABEL: v_clz8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.clz8.v8i8(<8 x i8> %0) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.clz8.v8i8(<8 x i8>) + +define i64 @clz16(i64 %a) { +; CHECK-LABEL: clz16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clz16.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clz16.i64(i64) + +define i64 @v_clz16(i64 %a.coerce) { +; CHECK-LABEL: v_clz16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.clz16.v4i16(<4 x i16> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.clz16.v4i16(<4 x i16>) + +define i64 @clz32(i64 %a) { +; CHECK-LABEL: clz32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.clz32.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.clz32.i64(i64) + +define i64 @v_clz32(i64 %a.coerce) { +; CHECK-LABEL: v_clz32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clz32 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = tail call <2 x i32> @llvm.riscv.clz32.v2i32(<2 x i32> %0) + %2 = bitcast <2 x i32> %1 to i64 + ret i64 %2 +} + +declare <2 x i32> @llvm.riscv.clz32.v2i32(<2 x i32>) + +define i64 @cmpeq8(i64 %a, i64 %b) { +; CHECK-LABEL: cmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.cmpeq8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.cmpeq8.i64(i64, i64) + +define i64 @v_scmpeq8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.cmpeq8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.cmpeq8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @v_ucmpeq8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmpeq8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.cmpeq8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +define i64 @cmpeq16(i64 %a, i64 %b) { +; CHECK-LABEL: cmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.cmpeq16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.cmpeq16.i64(i64, i64) + +define i64 @v_scmpeq16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.cmpeq16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.cmpeq16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_ucmpeq16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmpeq16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpeq16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.cmpeq16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @cras16(i64 %a, i64 %b) { +; CHECK-LABEL: cras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.cras16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.cras16.i64(i64, i64) + +define i64 @v_ucras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.cras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.cras16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_scras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.cras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @crsa16(i64 %a, i64 %b) { +; CHECK-LABEL: crsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.crsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.crsa16.i64(i64, i64) + +define i64 @v_ucrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.crsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.crsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_scrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: crsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.crsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @insb(i64 %a, i64 %b) { +; CHECK-LABEL: insb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: insb a0, a1, 5 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.insb.i64(i64 %a, i64 %b, i64 5) + ret i64 %0 +} + +declare i64 @llvm.riscv.insb.i64(i64, i64, i64) + +define i64 @kabs8(i64 %a) { +; CHECK-LABEL: kabs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kabs8.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.kabs8.i64(i64) + +define i64 @v_kabs8(i64 %a.coerce) { +; CHECK-LABEL: v_kabs8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.kabs8.v8i8(<8 x i8> %0) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.kabs8.v8i8(<8 x i8>) + +define i64 @kabs16(i64 %a) { +; CHECK-LABEL: kabs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kabs16.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.kabs16.i64(i64) + +define i64 @v_kabs16(i64 %a.coerce) { +; CHECK-LABEL: v_kabs16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabs16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.kabs16.v4i16(<4 x i16> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.kabs16.v4i16(<4 x i16>) + +define i64 @kabsw(i64 %a) { +; CHECK-LABEL: kabsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kabsw a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kabsw.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.kabsw.i64(i64) + +define i64 @kadd8(i64 %a, i64 %b) { +; CHECK-LABEL: kadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kadd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kadd8.i64(i64, i64) + +define i64 @v_kadd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.kadd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.kadd8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @kadd16(i64 %a, i64 %b) { +; CHECK-LABEL: kadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kadd16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kadd16.i64(i64, i64) + +define i64 @v_kadd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.kadd16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.kadd16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kaddh(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kaddh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kaddh.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kaddh.i64(i64, i64) + +define i64 @kaddw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kaddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kaddw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kaddw.i64(i64, i64) + +define i64 @kcras16(i64 %a, i64 %b) { +; CHECK-LABEL: kcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kcras16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kcras16.i64(i64, i64) + +define i64 @v_kcras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.kcras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.kcras16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kcrsa16(i64 %a, i64 %b) { +; CHECK-LABEL: kcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kcrsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kcrsa16.i64(i64, i64) + +define i64 @v_kcrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.kcrsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.kcrsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kdmbb(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: kdmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmbb.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmbb.i64.i64(i64, i64) + +define i64 @v_kdmbb(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmbb.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmbb.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kdmbt(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: kdmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmbt.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmbt.i64.i64(i64, i64) + +define i64 @v_kdmbt(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmbt.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmbt.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kdmtt(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: kdmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmtt.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmtt.i64.i64(i64, i64) + +define i64 @v_kdmtt(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmtt.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmtt.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kdmabb(i64 %t, i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a2, a2, 32 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: kdmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmabb.i64.i64(i64 %t, i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmabb.i64.i64(i64, i64, i64) + +define i64 @v_kdmabb(i64 %t, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmabb.i64.v4i16(i64 %t, <4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmabb.i64.v4i16(i64, <4 x i16>, <4 x i16>) + +define i64 @kdmabt(i64 %t, i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a2, a2, 32 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: kdmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmabt.i64.i64(i64 %t, i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmabt.i64.i64(i64, i64, i64) + +define i64 @v_kdmabt(i64 %t, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmabt.i64.v4i16(i64 %t, <4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmabt.i64.v4i16(i64, <4 x i16>, <4 x i16>) + +define i64 @kdmatt(i64 %t, i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kdmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a2, a2, 32 +; CHECK-NEXT: srli a2, a2, 32 +; CHECK-NEXT: kdmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kdmatt.i64.i64(i64 %t, i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kdmatt.i64.i64(i64, i64, i64) + +define i64 @v_kdmatt(i64 %t, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kdmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kdmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.kdmatt.i64.v4i16(i64 %t, <4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.kdmatt.i64.v4i16(i64, <4 x i16>, <4 x i16>) + +define i64 @khm8(i64 %a, i64 %b) { +; CHECK-LABEL: khm8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.khm8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.khm8.i64(i64, i64) + +define i64 @v_khm8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khm8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.khm8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.khm8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @khmx8(i64 %a, i64 %b) { +; CHECK-LABEL: khmx8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.khmx8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.khmx8.i64(i64, i64) + +define i64 @v_khmx8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khmx8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.khmx8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.khmx8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @khm16(i64 %a, i64 %b) { +; CHECK-LABEL: khm16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.khm16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.khm16.i64(i64, i64) + +define i64 @v_khm16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khm16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khm16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.khm16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.khm16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @khmx16(i64 %a, i64 %b) { +; CHECK-LABEL: khmx16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.khmx16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.khmx16.i64(i64, i64) + +define i64 @v_khmx16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khmx16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmx16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.khmx16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.khmx16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @khmbb(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: khmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: khmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.khmbb.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.khmbb.i64.i64(i64, i64) + +define i64 @v_khmbb(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.khmbb.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.khmbb.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @khmbt(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: khmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: khmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.khmbt.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.khmbt.i64.i64(i64, i64) + +define i64 @v_khmbt(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khmbt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmbt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.khmbt.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.khmbt.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @khmtt(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: khmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: khmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.khmtt.i64.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.khmtt.i64.i64(i64, i64) + +define i64 @v_khmtt(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_khmtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: khmtt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call i64 @llvm.riscv.khmtt.i64.v4i16(<4 x i16> %0, <4 x i16> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.khmtt.i64.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kmabb(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmabb.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmabb.i64.i64(i64, i64, i64) + +define i64 @v_kmabb(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmabb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmabb.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmabb.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmabt(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmabt.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmabt.i64.i64(i64, i64, i64) + +define i64 @v_kmabt(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmabt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmabt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmabt.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmabt.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmatt(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmatt.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmatt.i64.i64(i64, i64, i64) + +define i64 @v_kmatt(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmatt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmatt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmatt.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmatt.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmada(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmada.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmada.i64.i64(i64, i64, i64) + +define i64 @v_kmada(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmada.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmada.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmaxda(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmaxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmaxda.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmaxda.i64.i64(i64, i64, i64) + +define i64 @v_kmaxda(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmaxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmaxda.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmaxda.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmads(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmads: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmads a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmads.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmads.i64.i64(i64, i64, i64) + +define i64 @v_kmads(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmads: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmads a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmads.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmads.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmadrs(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmadrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmadrs a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmadrs.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmadrs.i64.i64(i64, i64, i64) + +define i64 @v_kmadrs(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmadrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmadrs a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmadrs.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmadrs.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmaxds(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmaxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxds a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmaxds.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmaxds.i64.i64(i64, i64, i64) + +define i64 @v_kmaxds(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmaxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmaxds a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmaxds.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmaxds.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmda(i64 %a, i64 %b) { +; CHECK-LABEL: kmda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmda.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmda.i64.i64(i64, i64) + +define i64 @v_kmda(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmda.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmda.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kmxda(i64 %a, i64 %b) { +; CHECK-LABEL: kmxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmxda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmxda.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmxda.i64.i64(i64, i64) + +define i64 @v_kmxda(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmxda a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmxda.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmxda.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kmmac(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmac: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmac.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmac.i64(i64, i64, i64) + +define i64 @v_kmmac(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmac: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <2 x i32> + %3 = tail call <2 x i32> @llvm.riscv.kmmac.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmac.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define i64 @kmmac_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmac_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmac.u.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmac.u.i64(i64, i64, i64) + +define i64 @v_kmmac_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmac_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmac.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <2 x i32> + %3 = tail call <2 x i32> @llvm.riscv.kmmac.u.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmac.u.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define i64 @kmmawb(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawb.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawb.i64.i64(i64, i64, i64) + +define i64 @v_kmmawb(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawb.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawb.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawb_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawb.u.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawb.u.i64.i64(i64, i64, i64) + +define i64 @v_kmmawb_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawb.u.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawb.u.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawb2(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawb2.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawb2.i64.i64(i64, i64, i64) + +define i64 @v_kmmawb2(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawb2.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawb2.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawb2_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawb2.u.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawb2.u.i64.i64(i64, i64, i64) + +define i64 @v_kmmawb2_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawb2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawb2.u.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawb2.u.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawt(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawt.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawt.i64.i64(i64, i64, i64) + +define i64 @v_kmmawt(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawt.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawt.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawt_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawt.u.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawt.u.i64.i64(i64, i64, i64) + +define i64 @v_kmmawt_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawt.u.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawt.u.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawt2(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawt2.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawt2.i64.i64(i64, i64, i64) + +define i64 @v_kmmawt2(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2 a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawt2.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawt2.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmawt2_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmawt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmawt2.u.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmawt2.u.i64.i64(i64, i64, i64) + +define i64 @v_kmmawt2_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmawt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmawt2.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmmawt2.u.v2i32.v4i16(<2 x i32> %0, <2 x i32> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmawt2.u.v2i32.v4i16(<2 x i32>, <2 x i32>, <4 x i16>) + +define i64 @kmmsb(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmsb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmsb.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmsb.i64(i64, i64, i64) + +define i64 @v_kmmsb(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmsb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <2 x i32> + %3 = tail call <2 x i32> @llvm.riscv.kmmsb.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmsb.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define i64 @kmmsb_u(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmmsb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmsb.u.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmsb.u.i64(i64, i64, i64) + +define i64 @v_kmmsb_u(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmsb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmsb.u a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <2 x i32> + %2 = bitcast i64 %b.coerce to <2 x i32> + %3 = tail call <2 x i32> @llvm.riscv.kmmsb.u.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmmsb.u.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define i64 @kmmwb2(i64 %a, i64 %b) { +; CHECK-LABEL: kmmwb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmwb2.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmwb2.i64.i64(i64, i64) + +define i64 @v_kmmwb2(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmwb2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmmwb2.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmmwb2.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @kmmwb2_u(i64 %a, i64 %b) { +; CHECK-LABEL: kmmwb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmwb2.u.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmwb2.u.i64.i64(i64, i64) + +define i64 @v_kmmwb2_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmwb2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwb2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmmwb2.u.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmmwb2.u.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @kmmwt2(i64 %a, i64 %b) { +; CHECK-LABEL: kmmwt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmwt2.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmwt2.i64.i64(i64, i64) + +define i64 @v_kmmwt2(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmwt2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmmwt2.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmmwt2.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @kmmwt2_u(i64 %a, i64 %b) { +; CHECK-LABEL: kmmwt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmmwt2.u.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmmwt2.u.i64.i64(i64, i64) + +define i64 @v_kmmwt2_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmmwt2_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmmwt2.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.kmmwt2.u.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kmmwt2.u.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @kmsda(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmsda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmsda.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmsda.i64.i64(i64, i64, i64) + +define i64 @v_kmsda(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmsda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmsda.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmsda.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @kmsxda(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: kmsxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kmsxda.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kmsxda.i64.i64(i64, i64, i64) + +define i64 @v_kmsxda(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kmsxda: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kmsxda a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <4 x i16> + %2 = bitcast i64 %b.coerce to <4 x i16> + %3 = tail call <2 x i32> @llvm.riscv.kmsxda.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1, <4 x i16> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.kmsxda.v2i32.v4i16(<2 x i32>, <4 x i16>, <4 x i16>) + +define i64 @ksllw(i64 %a, i32 signext %b) { +; CHECK-LABEL: ksllw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ksllw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ksllw.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksllw.i64(i64, i64) + +define i64 @ksll8(i64 %a, i32 signext %b) { +; CHECK-LABEL: ksll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ksll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ksll8.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksll8.i64.i64(i64, i64) + +define i64 @v_ksll8(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_ksll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ksll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.ksll8.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.ksll8.v8i8.i64(<8 x i8>, i64) + +define i64 @ksll16(i64 %a, i32 signext %b) { +; CHECK-LABEL: ksll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ksll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ksll16.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksll16.i64.i64(i64, i64) + +define i64 @v_ksll16(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_ksll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ksll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.ksll16.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.ksll16.v4i16.i64(<4 x i16>, i64) + +define i64 @kslra8(i64 %a, i32 signext %b) { +; CHECK-LABEL: kslra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslra8.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslra8.i64.i64(i64, i64) + +define i64 @v_kslra8(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_kslra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = sext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.kslra8.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.kslra8.v8i8.i64(<8 x i8>, i64) + +define i64 @kslra8_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: kslra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslra8.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslra8.u.i64.i64(i64, i64) + +define i64 @v_kslra8_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_kslra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = sext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.kslra8.u.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.kslra8.u.v8i8.i64(<8 x i8>, i64) + +define i64 @kslra16(i64 %a, i32 signext %b) { +; CHECK-LABEL: kslra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslra16.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslra16.i64.i64(i64, i64) + +define i64 @v_kslra16(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_kslra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = sext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.kslra16.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.kslra16.v4i16.i64(<4 x i16>, i64) + +define i64 @kslra16_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: kslra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslra16.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslra16.u.i64.i64(i64, i64) + +define i64 @v_kslra16_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_kslra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = sext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.kslra16.u.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.kslra16.u.v4i16.i64(<4 x i16>, i64) + +define i64 @kslraw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kslraw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslraw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslraw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslraw.i64(i64, i64) + +define i64 @kslraw_u(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: kslraw_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kslraw.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.kslraw.u.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.kslraw.u.i64(i64, i64) + +define i64 @kstas16(i64 %a, i64 %b) { +; CHECK-LABEL: kstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kstas16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kstas16.i64(i64, i64) + +define i64 @v_kstas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.kstas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.kstas16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @kstsa16(i64 %a, i64 %b) { +; CHECK-LABEL: kstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kstsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kstsa16.i64(i64, i64) + +define i64 @v_kstsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.kstsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.kstsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ksub8(i64 %a, i64 %b) { +; CHECK-LABEL: ksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ksub8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksub8.i64(i64, i64) + +define i64 @v_ksub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.ksub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.ksub8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @ksub16(i64 %a, i64 %b) { +; CHECK-LABEL: ksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ksub16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksub16.i64(i64, i64) + +define i64 @v_ksub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ksub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ksub16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ksubh(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: ksubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksubh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ksubh.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksubh.i64(i64, i64) + +define i64 @ksubw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: ksubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ksubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ksubw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.ksubw.i64(i64, i64) + +define i64 @kwmmul(i64 %a, i64 %b) { +; CHECK-LABEL: kwmmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kwmmul.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kwmmul.i64(i64, i64) + +define i64 @v_kwmmul(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kwmmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <2 x i32> + %2 = tail call <2 x i32> @llvm.riscv.kwmmul.v2i32(<2 x i32> %0, <2 x i32> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kwmmul.v2i32(<2 x i32>, <2 x i32>) + +define i64 @kwmmul_u(i64 %a, i64 %b) { +; CHECK-LABEL: kwmmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.kwmmul.u.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.kwmmul.u.i64(i64, i64) + +define i64 @v_kwmmul_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_kwmmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: kwmmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <2 x i32> + %2 = tail call <2 x i32> @llvm.riscv.kwmmul.u.v2i32(<2 x i32> %0, <2 x i32> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.kwmmul.u.v2i32(<2 x i32>, <2 x i32>) + +define i64 @maxw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: maxw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: maxw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.maxw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.maxw.i64(i64, i64) + +define i64 @minw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: minw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: minw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.minw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.minw.i64(i64, i64) + +define i64 @pbsad(i64 %a, i64 %b) { +; CHECK-LABEL: pbsad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsad a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pbsad.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pbsad.i64.i64(i64, i64) + +define i64 @v_pbsad(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pbsad: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsad a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call i64 @llvm.riscv.pbsad.i64.v8i8(<8 x i8> %0, <8 x i8> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.pbsad.i64.v8i8(<8 x i8>, <8 x i8>) + +define i64 @pbsada(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: pbsada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pbsada.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pbsada.i64.i64(i64, i64, i64) + +define i64 @v_pbsada(i64 %t, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pbsada: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pbsada a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call i64 @llvm.riscv.pbsada.i64.v8i8(i64 %t, <8 x i8> %0, <8 x i8> %1) + ret i64 %2 +} + +declare i64 @llvm.riscv.pbsada.i64.v8i8(i64, <8 x i8>, <8 x i8>) + +define i64 @pkbb16(i64 %a, i64 %b) { +; CHECK-LABEL: pkbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pkbb16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pkbb16.i64(i64, i64) + +define i64 @v_pkbb16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pkbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.pkbb16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.pkbb16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @pkbt16(i64 %a, i64 %b) { +; CHECK-LABEL: pkbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pkbt16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pkbt16.i64(i64, i64) + +define i64 @v_pkbt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pkbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pkbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.pkbt16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.pkbt16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @pktb16(i64 %a, i64 %b) { +; CHECK-LABEL: pktb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pktb16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pktb16.i64(i64, i64) + +define i64 @v_pktb16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pktb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.pktb16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.pktb16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @pktt16(i64 %a, i64 %b) { +; CHECK-LABEL: pktt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.pktt16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.pktt16.i64(i64, i64) + +define i64 @v_pktt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_pktt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pktt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.pktt16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.pktt16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @radd8(i64 %a, i64 %b) { +; CHECK-LABEL: radd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.radd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.radd8.i64(i64, i64) + +define i64 @v_radd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_radd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.radd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.radd8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @radd16(i64 %a, i64 %b) { +; CHECK-LABEL: radd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.radd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +define i64 @v_radd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_radd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: radd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.radd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +define i64 @raddw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: raddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: raddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.raddw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.raddw.i64(i64, i64) + +define i64 @rcras16(i64 %a, i64 %b) { +; CHECK-LABEL: rcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rcras16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rcras16.i64(i64, i64) + +define i64 @v_rcras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.rcras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.rcras16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @rcrsa16(i64 %a, i64 %b) { +; CHECK-LABEL: rcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rcrsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rcrsa16.i64(i64, i64) + +define i64 @v_rcrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.rcrsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.rcrsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @rstas16(i64 %a, i64 %b) { +; CHECK-LABEL: rstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rstas16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rstas16.i64(i64, i64) + +define i64 @v_rstas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.rstas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.rstas16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @rstsa16(i64 %a, i64 %b) { +; CHECK-LABEL: rstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rstsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rstsa16.i64(i64, i64) + +define i64 @v_rstsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.rstsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.rstsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @rsub8(i64 %a, i64 %b) { +; CHECK-LABEL: rsub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rsub8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rsub8.i64(i64, i64) + +define i64 @v_rsub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rsub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.rsub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.rsub8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @rsub16(i64 %a, i64 %b) { +; CHECK-LABEL: rsub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.rsub16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.rsub16.i64(i64, i64) + +define i64 @v_rsub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_rsub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.rsub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.rsub16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @rsubw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: rsubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rsubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.rsubw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.rsubw.i64(i64, i64) + +define i64 @sclip8(i64 %a) { +; CHECK-LABEL: sclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip8 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sclip8.i64.i64(i64 %a, i64 7) + ret i64 %0 +} + +declare i64 @llvm.riscv.sclip8.i64.i64(i64, i64) + +define i64 @v_sclip8(i64 %a.coerce) { +; CHECK-LABEL: v_sclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip8 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.sclip8.v8i8.i64(<8 x i8> %0, i64 7) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.sclip8.v8i8.i64(<8 x i8>, i64) + +define i64 @sclip16(i64 %a) { +; CHECK-LABEL: sclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip16 a0, a0, 8 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sclip16.i64.i64(i64 %a, i64 8) + ret i64 %0 +} + +declare i64 @llvm.riscv.sclip16.i64.i64(i64, i64) + +define i64 @v_sclip16(i64 %a.coerce) { +; CHECK-LABEL: v_sclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip16 a0, a0, 8 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.sclip16.v4i16.i64(<4 x i16> %0, i64 8) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.sclip16.v4i16.i64(<4 x i16>, i64) + +define i64 @sclip32(i64 %a) { +; CHECK-LABEL: sclip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sclip32 a0, a0, 9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sclip32.i64.i64(i64 %a, i64 9) + ret i64 %0 +} + +declare i64 @llvm.riscv.sclip32.i64.i64(i64, i64) + +define i64 @v_sclip32(i64 %a.coerce) { +; CHECK-LABEL: v_sclip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, zero, 9 +; CHECK-NEXT: sclip32 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = tail call <2 x i32> @llvm.riscv.sclip32.v2i32.i64(<2 x i32> %0, i64 9) + %2 = bitcast <2 x i32> %1 to i64 + ret i64 %2 +} + +declare <2 x i32> @llvm.riscv.sclip32.v2i32.i64(<2 x i32>, i64) + +define i64 @scmple8(i64 %a, i64 %b) { +; CHECK-LABEL: scmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.scmple8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.scmple8.i64(i64, i64) + +define i64 @v_scmple8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.scmple8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.scmple8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @scmple16(i64 %a, i64 %b) { +; CHECK-LABEL: scmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.scmple16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.scmple16.i64(i64, i64) + +define i64 @v_scmple16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.scmple16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.scmple16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @scmplt8(i64 %a, i64 %b) { +; CHECK-LABEL: scmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.scmplt8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.scmplt8.i64(i64, i64) + +define i64 @v_scmplt8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.scmplt8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.scmplt8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @scmplt16(i64 %a, i64 %b) { +; CHECK-LABEL: scmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.scmplt16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.scmplt16.i64(i64, i64) + +define i64 @v_scmplt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_scmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: scmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.scmplt16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.scmplt16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @sll8(i64 %a, i32 signext %b) { +; CHECK-LABEL: sll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sll8.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sll8.i64.i64(i64, i64) + +define i64 @v_sll8(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sll8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sll8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.sll8.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.sll8.v8i8.i64(<8 x i8>, i64) + +define i64 @sll16(i64 %a, i32 signext %b) { +; CHECK-LABEL: sll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sll16.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sll16.i64.i64(i64, i64) + +define i64 @v_sll16(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sll16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sll16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.sll16.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.sll16.v4i16.i64(<4 x i16>, i64) + +define i64 @smaqa(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: smaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smaqa.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smaqa.i64.i64(i64, i64, i64) + +define i64 @v_smaqa(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <8 x i8> + %2 = bitcast i64 %b.coerce to <8 x i8> + %3 = tail call <2 x i32> @llvm.riscv.smaqa.v2i32.v8i8(<2 x i32> %0, <8 x i8> %1, <8 x i8> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.smaqa.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) + +define i64 @smaqa_su(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: smaqa_su: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa.su a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smaqa.su.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smaqa.su.i64.i64(i64, i64, i64) + +define i64 @v_smaqa_su(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smaqa_su: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smaqa.su a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <8 x i8> + %2 = bitcast i64 %b.coerce to <8 x i8> + %3 = tail call <2 x i32> @llvm.riscv.smaqa.su.v2i32.v8i8(<2 x i32> %0, <8 x i8> %1, <8 x i8> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.smaqa.su.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) + +define i64 @smax8(i64 %a, i64 %b) { +; CHECK-LABEL: smax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smax8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smax8.i64(i64, i64) + +define i64 @v_smax8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.smax8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.smax8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @smax16(i64 %a, i64 %b) { +; CHECK-LABEL: smax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smax16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smax16.i64(i64, i64) + +define i64 @v_smax16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.smax16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.smax16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smbb16(i64 %a, i64 %b) { +; CHECK-LABEL: smbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smbb16.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smbb16.i64.i64(i64, i64) + +define i64 @v_smbb16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smbb16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbb16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smbb16.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smbb16.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smbt16(i64 %a, i64 %b) { +; CHECK-LABEL: smbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smbt16.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smbt16.i64.i64(i64, i64) + +define i64 @v_smbt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smbt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smbt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smbt16.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smbt16.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smtt16(i64 %a, i64 %b) { +; CHECK-LABEL: smtt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smtt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smtt16.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smtt16.i64.i64(i64, i64) + +define i64 @v_smtt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smtt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smtt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smtt16.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smtt16.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smds(i64 %a, i64 %b) { +; CHECK-LABEL: smds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smds.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smds.i64.i64(i64, i64) + +define i64 @v_smds(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smds.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smds.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smdrs(i64 %a, i64 %b) { +; CHECK-LABEL: smdrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smdrs a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smdrs.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smdrs.i64.i64(i64, i64) + +define i64 @v_smdrs(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smdrs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smdrs a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smdrs.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smdrs.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smxds(i64 %a, i64 %b) { +; CHECK-LABEL: smxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smxds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smxds.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smxds.i64.i64(i64, i64) + +define i64 @v_smxds(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smxds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smxds a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smxds.v2i32.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smxds.v2i32.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smin8(i64 %a, i64 %b) { +; CHECK-LABEL: smin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smin8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smin8.i64(i64, i64) + +define i64 @v_smin8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.smin8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.smin8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @smin16(i64 %a, i64 %b) { +; CHECK-LABEL: smin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smin16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smin16.i64(i64, i64) + +define i64 @v_smin16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.smin16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.smin16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @smmul(i64 %a, i64 %b) { +; CHECK-LABEL: smmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmul.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmul.i64(i64, i64) + +define i64 @v_smmul(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <2 x i32> + %2 = tail call <2 x i32> @llvm.riscv.smmul.v2i32(<2 x i32> %0, <2 x i32> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmul.v2i32(<2 x i32>, <2 x i32>) + +define i64 @smmul_u(i64 %a, i64 %b) { +; CHECK-LABEL: smmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmul.u.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmul.u.i64(i64, i64) + +define i64 @v_smmul_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmul_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmul.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <2 x i32> + %2 = tail call <2 x i32> @llvm.riscv.smmul.u.v2i32(<2 x i32> %0, <2 x i32> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmul.u.v2i32(<2 x i32>, <2 x i32>) + +define i64 @smmwb(i64 %a, i64 %b) { +; CHECK-LABEL: smmwb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmwb.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmwb.i64.i64(i64, i64) + +define i64 @v_smmwb(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmwb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smmwb.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmwb.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @smmwb_u(i64 %a, i64 %b) { +; CHECK-LABEL: smmwb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmwb.u.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmwb.u.i64.i64(i64, i64) + +define i64 @v_smmwb_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmwb_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwb.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smmwb.u.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmwb.u.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @smmwt(i64 %a, i64 %b) { +; CHECK-LABEL: smmwt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmwt.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmwt.i64.i64(i64, i64) + +define i64 @v_smmwt(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmwt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smmwt.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmwt.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @smmwt_u(i64 %a, i64 %b) { +; CHECK-LABEL: smmwt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.smmwt.u.i64.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.smmwt.u.i64.i64(i64, i64) + +define i64 @v_smmwt_u(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_smmwt_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: smmwt.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <2 x i32> @llvm.riscv.smmwt.u.v2i32.v4i16(<2 x i32> %0, <4 x i16> %1) + %3 = bitcast <2 x i32> %2 to i64 + ret i64 %3 +} + +declare <2 x i32> @llvm.riscv.smmwt.u.v2i32.v4i16(<2 x i32>, <4 x i16>) + +define i64 @sra_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: sra_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sra.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sra.u.i64.i64(i64, i64) + +define i64 @sra8(i64 %a, i32 signext %b) { +; CHECK-LABEL: sra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sra8.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sra8.i64.i64(i64, i64) + +define i64 @v_sra8(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sra8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.sra8.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.sra8.v8i8.i64(<8 x i8>, i64) + +define i64 @sra8_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: sra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sra8.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sra8.u.i64.i64(i64, i64) + +define i64 @v_sra8_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sra8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.sra8.u.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.sra8.u.v8i8.i64(<8 x i8>, i64) + +define i64 @sra16(i64 %a, i32 signext %b) { +; CHECK-LABEL: sra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sra16.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sra16.i64.i64(i64, i64) + +define i64 @v_sra16(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sra16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.sra16.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.sra16.v4i16.i64(<4 x i16>, i64) + +define i64 @sra16_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: sra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.sra16.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.sra16.u.i64.i64(i64, i64) + +define i64 @v_sra16_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_sra16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: sra16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.sra16.u.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.sra16.u.v4i16.i64(<4 x i16>, i64) + +define i64 @srl8(i64 %a, i32 signext %b) { +; CHECK-LABEL: srl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.srl8.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.srl8.i64.i64(i64, i64) + +define i64 @v_srl8(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_srl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.srl8.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.srl8.v8i8.i64(<8 x i8>, i64) + +define i64 @srl8_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: srl8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.srl8.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.srl8.u.i64.i64(i64, i64) + +define i64 @v_srl8_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_srl8_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl8.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %conv = zext i32 %b to i64 + %1 = tail call <8 x i8> @llvm.riscv.srl8.u.v8i8.i64(<8 x i8> %0, i64 %conv) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.srl8.u.v8i8.i64(<8 x i8>, i64) + +define i64 @srl16(i64 %a, i32 signext %b) { +; CHECK-LABEL: srl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.srl16.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.srl16.i64.i64(i64, i64) + +define i64 @v_srl16(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_srl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.srl16.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.srl16.v4i16.i64(<4 x i16>, i64) + +define i64 @srl16_u(i64 %a, i32 signext %b) { +; CHECK-LABEL: srl16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.srl16.u.i64.i64(i64 %a, i64 %conv) + ret i64 %0 +} + +declare i64 @llvm.riscv.srl16.u.i64.i64(i64, i64) + +define i64 @v_srl16_u(i64 %a.coerce, i32 signext %b) { +; CHECK-LABEL: v_srl16_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: srl16.u a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %conv = zext i32 %b to i64 + %1 = tail call <4 x i16> @llvm.riscv.srl16.u.v4i16.i64(<4 x i16> %0, i64 %conv) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.srl16.u.v4i16.i64(<4 x i16>, i64) + +define i64 @stas16(i64 %a, i64 %b) { +; CHECK-LABEL: stas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.stas16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.stas16.i64(i64, i64) + +define i64 @v_ustas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ustas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.stas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.stas16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_sstas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_sstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.stas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @stsa16(i64 %a, i64 %b) { +; CHECK-LABEL: stsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.stsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.stsa16.i64(i64, i64) + +define i64 @v_ustsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ustsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.stsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.stsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_sstsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_sstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.stsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @sub8(i64 %a, i64 %b) { +; CHECK-LABEL: sub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sub8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.sub8.i64(i64, i64) + +define i64 @v_usub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_usub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.sub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.sub8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @v_ssub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ssub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.sub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +define i64 @sub16(i64 %a, i64 %b) { +; CHECK-LABEL: sub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sub16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.sub16.i64(i64, i64) + +define i64 @v_usub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_usub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.sub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.sub16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @v_ssub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ssub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.sub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +define i64 @sunpkd810(i64 %a) { +; CHECK-LABEL: sunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sunpkd810.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.sunpkd810.i64(i64) + +define i64 @v_sunpkd810(i64 %a.coerce) { +; CHECK-LABEL: v_sunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.sunpkd810.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.sunpkd810.v4i16(<8 x i8>) + +define i64 @sunpkd820(i64 %a) { +; CHECK-LABEL: sunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sunpkd820.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.sunpkd820.i64(i64) + +define i64 @v_sunpkd820(i64 %a.coerce) { +; CHECK-LABEL: v_sunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.sunpkd820.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.sunpkd820.v4i16(<8 x i8>) + +define i64 @sunpkd830(i64 %a) { +; CHECK-LABEL: sunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sunpkd830.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.sunpkd830.i64(i64) + +define i64 @v_sunpkd830(i64 %a.coerce) { +; CHECK-LABEL: v_sunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.sunpkd830.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.sunpkd830.v4i16(<8 x i8>) + +define i64 @sunpkd831(i64 %a) { +; CHECK-LABEL: sunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sunpkd831.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.sunpkd831.i64(i64) + +define i64 @v_sunpkd831(i64 %a.coerce) { +; CHECK-LABEL: v_sunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.sunpkd831.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.sunpkd831.v4i16(<8 x i8>) + +define i64 @sunpkd832(i64 %a) { +; CHECK-LABEL: sunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.sunpkd832.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.sunpkd832.i64(i64) + +define i64 @v_sunpkd832(i64 %a.coerce) { +; CHECK-LABEL: v_sunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.sunpkd832.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.sunpkd832.v4i16(<8 x i8>) + +define i64 @swap8(i64 %a) { +; CHECK-LABEL: swap8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.swap8.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.swap8.i64(i64) + +define i64 @v_swap8(i64 %a.coerce) { +; CHECK-LABEL: v_swap8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap8 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.swap8.v8i8(<8 x i8> %0) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.swap8.v8i8(<8 x i8>) + +define i64 @swap16(i64 %a) { +; CHECK-LABEL: swap16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.swap16.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.swap16.i64(i64) + +define i64 @v_swap16(i64 %a.coerce) { +; CHECK-LABEL: v_swap16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: swap16 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.swap16.v4i16(<4 x i16> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.swap16.v4i16(<4 x i16>) + +define i64 @uclip8(i64 %a) { +; CHECK-LABEL: uclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip8 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uclip8.i64.i64(i64 %a, i64 7) + ret i64 %0 +} + +declare i64 @llvm.riscv.uclip8.i64.i64(i64, i64) + +define i64 @v_uclip8(i64 %a.coerce) { +; CHECK-LABEL: v_uclip8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip8 a0, a0, 7 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <8 x i8> @llvm.riscv.uclip8.v8i8.i64(<8 x i8> %0, i64 7) + %2 = bitcast <8 x i8> %1 to i64 + ret i64 %2 +} + +declare <8 x i8> @llvm.riscv.uclip8.v8i8.i64(<8 x i8>, i64) + +define i64 @uclip16(i64 %a) { +; CHECK-LABEL: uclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip16 a0, a0, 8 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uclip16.i64.i64(i64 %a, i64 8) + ret i64 %0 +} + +declare i64 @llvm.riscv.uclip16.i64.i64(i64, i64) + +define i64 @v_uclip16(i64 %a.coerce) { +; CHECK-LABEL: v_uclip16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip16 a0, a0, 8 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = tail call <4 x i16> @llvm.riscv.uclip16.v4i16.i64(<4 x i16> %0, i64 8) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.uclip16.v4i16.i64(<4 x i16>, i64) + +define i64 @uclip32(i64 %a) { +; CHECK-LABEL: uclip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uclip32 a0, a0, 9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uclip32.i64.i64(i64 %a, i64 9) + ret i64 %0 +} + +declare i64 @llvm.riscv.uclip32.i64.i64(i64, i64) + +define i64 @v_uclip32(i64 %a.coerce) { +; CHECK-LABEL: v_uclip32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, zero, 9 +; CHECK-NEXT: uclip32 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <2 x i32> + %1 = tail call <2 x i32> @llvm.riscv.uclip32.v2i32.i64(<2 x i32> %0, i64 9) + %2 = bitcast <2 x i32> %1 to i64 + ret i64 %2 +} + +declare <2 x i32> @llvm.riscv.uclip32.v2i32.i64(<2 x i32>, i64) + +define i64 @ucmple8(i64 %a, i64 %b) { +; CHECK-LABEL: ucmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ucmple8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ucmple8.i64(i64, i64) + +define i64 @v_ucmple8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmple8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.ucmple8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.ucmple8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @ucmple16(i64 %a, i64 %b) { +; CHECK-LABEL: ucmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ucmple16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ucmple16.i64(i64, i64) + +define i64 @v_ucmple16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmple16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmple16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ucmple16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ucmple16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ucmplt8(i64 %a, i64 %b) { +; CHECK-LABEL: ucmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ucmplt8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ucmplt8.i64(i64, i64) + +define i64 @v_ucmplt8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmplt8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.ucmplt8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.ucmplt8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @ucmplt16(i64 %a, i64 %b) { +; CHECK-LABEL: ucmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ucmplt16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ucmplt16.i64(i64, i64) + +define i64 @v_ucmplt16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ucmplt16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ucmplt16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ucmplt16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ucmplt16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ukadd8(i64 %a, i64 %b) { +; CHECK-LABEL: ukadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukadd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukadd8.i64(i64, i64) + +define i64 @v_ukadd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukadd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.ukadd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.ukadd8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @ukadd16(i64 %a, i64 %b) { +; CHECK-LABEL: ukadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukadd16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukadd16.i64(i64, i64) + +define i64 @v_ukadd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukadd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukadd16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ukadd16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ukadd16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ukaddh(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: ukaddh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukaddh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ukaddh.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukaddh.i64(i64, i64) + +define i64 @ukaddw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: ukaddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukaddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ukaddw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukaddw.i64(i64, i64) + +define i64 @ukcras16(i64 %a, i64 %b) { +; CHECK-LABEL: ukcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukcras16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukcras16.i64(i64, i64) + +define i64 @v_ukcras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ukcras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ukcras16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ukcrsa16(i64 %a, i64 %b) { +; CHECK-LABEL: ukcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukcrsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukcrsa16.i64(i64, i64) + +define i64 @v_ukcrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ukcrsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ukcrsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ukstas16(i64 %a, i64 %b) { +; CHECK-LABEL: ukstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukstas16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukstas16.i64(i64, i64) + +define i64 @v_ukstas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstas16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ukstas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ukstas16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ukstsa16(i64 %a, i64 %b) { +; CHECK-LABEL: ukstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ukstsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ukstsa16.i64(i64, i64) + +define i64 @v_ukstsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ukstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ukstsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ukstsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ukstsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @uksub8(i64 %a, i64 %b) { +; CHECK-LABEL: uksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uksub8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.uksub8.i64(i64, i64) + +define i64 @v_uksub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_uksub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.uksub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.uksub8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @uksub16(i64 %a, i64 %b) { +; CHECK-LABEL: uksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uksub16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.uksub16.i64(i64, i64) + +define i64 @v_uksub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_uksub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uksub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.uksub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.uksub16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @uksubh(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: uksubh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: uksubh a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.uksubh.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.uksubh.i64(i64, i64) + +define i64 @uksubw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: uksubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: uksubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.uksubw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.uksubw.i64(i64, i64) + +define i64 @umaqa(i64 %t, i64 %a, i64 %b) { +; CHECK-LABEL: umaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.umaqa.i64.i64(i64 %t, i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.umaqa.i64.i64(i64, i64, i64) + +define i64 @v_umaqa(i64 %t.coerce, i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_umaqa: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umaqa a0, a1, a2 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %t.coerce to <2 x i32> + %1 = bitcast i64 %a.coerce to <8 x i8> + %2 = bitcast i64 %b.coerce to <8 x i8> + %3 = tail call <2 x i32> @llvm.riscv.umaqa.v2i32.v8i8(<2 x i32> %0, <8 x i8> %1, <8 x i8> %2) + %4 = bitcast <2 x i32> %3 to i64 + ret i64 %4 +} + +declare <2 x i32> @llvm.riscv.umaqa.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) + +define i64 @umax8(i64 %a, i64 %b) { +; CHECK-LABEL: umax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.umax8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.umax8.i64(i64, i64) + +define i64 @v_umax8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_umax8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.umax8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.umax8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @umax16(i64 %a, i64 %b) { +; CHECK-LABEL: umax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.umax16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.umax16.i64(i64, i64) + +define i64 @v_umax16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_umax16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umax16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.umax16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.umax16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @umin8(i64 %a, i64 %b) { +; CHECK-LABEL: umin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.umin8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.umin8.i64(i64, i64) + +define i64 @v_umin8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_umin8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.umin8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.umin8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @umin16(i64 %a, i64 %b) { +; CHECK-LABEL: umin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.umin16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.umin16.i64(i64, i64) + +define i64 @v_umin16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_umin16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: umin16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.umin16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.umin16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @uradd8(i64 %a, i64 %b) { +; CHECK-LABEL: uradd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uradd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.uradd8.i64(i64, i64) + +define i64 @v_uradd8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_uradd8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.uradd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.uradd8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @uradd16(i64 %a, i64 %b) { +; CHECK-LABEL: uradd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.uradd8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +define i64 @v_uradd16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_uradd16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uradd8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.uradd8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +define i64 @uraddw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: uraddw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: uraddw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.uraddw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.uraddw.i64(i64, i64) + +define i64 @urcras16(i64 %a, i64 %b) { +; CHECK-LABEL: urcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.urcras16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.urcras16.i64(i64, i64) + +define i64 @v_urcras16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_urcras16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.urcras16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.urcras16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @urcrsa16(i64 %a, i64 %b) { +; CHECK-LABEL: urcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.urcrsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.urcrsa16.i64(i64, i64) + +define i64 @v_urcrsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_urcrsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.urcrsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.urcrsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @urstas16(i64 %a, i64 %b) { +; CHECK-LABEL: urstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.urstas16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.urstas16.i64(i64, i64) + +define i64 @v_urstas16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_urstas16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcras16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.urstas16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.urstas16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @urstsa16(i64 %a, i64 %b) { +; CHECK-LABEL: urstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.urstsa16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.urstsa16.i64(i64, i64) + +define i64 @v_urstsa16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_urstsa16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: urcrsa16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.urstsa16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.urstsa16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ursub8(i64 %a, i64 %b) { +; CHECK-LABEL: ursub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ursub8.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ursub8.i64(i64, i64) + +define i64 @v_ursub8(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ursub8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub8 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = bitcast i64 %b.coerce to <8 x i8> + %2 = tail call <8 x i8> @llvm.riscv.ursub8.v8i8(<8 x i8> %0, <8 x i8> %1) + %3 = bitcast <8 x i8> %2 to i64 + ret i64 %3 +} + +declare <8 x i8> @llvm.riscv.ursub8.v8i8(<8 x i8>, <8 x i8>) + +define i64 @ursub16(i64 %a, i64 %b) { +; CHECK-LABEL: ursub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.ursub16.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i64 @llvm.riscv.ursub16.i64(i64, i64) + +define i64 @v_ursub16(i64 %a.coerce, i64 %b.coerce) { +; CHECK-LABEL: v_ursub16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ursub16 a0, a0, a1 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <4 x i16> + %1 = bitcast i64 %b.coerce to <4 x i16> + %2 = tail call <4 x i16> @llvm.riscv.ursub16.v4i16(<4 x i16> %0, <4 x i16> %1) + %3 = bitcast <4 x i16> %2 to i64 + ret i64 %3 +} + +declare <4 x i16> @llvm.riscv.ursub16.v4i16(<4 x i16>, <4 x i16>) + +define i64 @ursubw(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: ursubw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: ursubw a0, a0, a1 +; CHECK-NEXT: ret +entry: + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %0 = tail call i64 @llvm.riscv.ursubw.i64(i64 %conv, i64 %conv1) + ret i64 %0 +} + +declare i64 @llvm.riscv.ursubw.i64(i64, i64) + +define i64 @zunpkd810(i64 %a) { +; CHECK-LABEL: zunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.zunpkd810.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.zunpkd810.i64(i64) + +define i64 @v_zunpkd810(i64 %a.coerce) { +; CHECK-LABEL: v_zunpkd810: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd810 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.zunpkd810.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.zunpkd810.v4i16(<8 x i8>) + +define i64 @zunpkd820(i64 %a) { +; CHECK-LABEL: zunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.zunpkd820.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.zunpkd820.i64(i64) + +define i64 @v_zunpkd820(i64 %a.coerce) { +; CHECK-LABEL: v_zunpkd820: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd820 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.zunpkd820.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.zunpkd820.v4i16(<8 x i8>) + +define i64 @zunpkd830(i64 %a) { +; CHECK-LABEL: zunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.zunpkd830.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.zunpkd830.i64(i64) + +define i64 @v_zunpkd830(i64 %a.coerce) { +; CHECK-LABEL: v_zunpkd830: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd830 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.zunpkd830.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.zunpkd830.v4i16(<8 x i8>) + +define i64 @zunpkd831(i64 %a) { +; CHECK-LABEL: zunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.zunpkd831.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.zunpkd831.i64(i64) + +define i64 @v_zunpkd831(i64 %a.coerce) { +; CHECK-LABEL: v_zunpkd831: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd831 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.zunpkd831.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.zunpkd831.v4i16(<8 x i8>) + +define i64 @zunpkd832(i64 %a) { +; CHECK-LABEL: zunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.zunpkd832.i64(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.riscv.zunpkd832.i64(i64) + +define i64 @v_zunpkd832(i64 %a.coerce) { +; CHECK-LABEL: v_zunpkd832: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: zunpkd832 a0, a0 +; CHECK-NEXT: ret +entry: + %0 = bitcast i64 %a.coerce to <8 x i8> + %1 = tail call <4 x i16> @llvm.riscv.v.zunpkd832.v4i16(<8 x i8> %0) + %2 = bitcast <4 x i16> %1 to i64 + ret i64 %2 +} + +declare <4 x i16> @llvm.riscv.v.zunpkd832.v4i16(<8 x i8>)