Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -253,6 +253,26 @@ : Inst { } +// SInstBF16: Wrapper around SInst that also defines a bfloat16 instruction +// guarded by __ARM_FEATURE_SVE_BF16. Type signature matches SInst but has an +// extra arg 'arch_features', a list of strings used to construct the +// ArchGuard. This is useful for defining intrinsics that are guarded on one or +// more arch features. +multiclass SInstBF16 ft = [], list ch = [], + list arch_features = []> { + let ArchGuard = !foldl("", arch_features, lhs, rhs, + lhs # !if(!ne(lhs, ""), " && ", "") # rhs) in { + def : SInst; + } + let ArchGuard = !foldl("", !listconcat(arch_features, + ["__ARM_FEATURE_SVE_BF16"]), + lhs, rhs, + lhs # !if(!ne(lhs, ""), " && ", "") # rhs) in { + def : SInst; + } +} + // MInst: Instructions which access memory class MInst f, MemEltType met = MemEltTyDefault, string i = ""> @@ -498,11 +518,7 @@ } // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; - -let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq">; -} +defm SVLD1RQ : SInstBF16<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; multiclass StructLoad { def : SInst; @@ -522,12 +538,7 @@ defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4">; // Load one octoword and replicate (scalar base) -let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { - def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">; -} -let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64) && defined(__ARM_FEATURE_SVE_BF16)" in { - def SVLD1RO_BF16 : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">; -} +defm SVLD1RO : SInstBF16<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; @@ -1170,36 +1181,24 @@ // instruction's immediate. def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; -def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; -def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; -def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; -def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi">; def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo">; def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo">; -def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1">; -def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2">; -let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { -def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev">; -def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel">; -def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice">; -def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1">; -def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2">; -def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1">; -def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2">; -} +defm SVEXT : SInstBF16<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; +defm SVREV : SInstBF16<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; +defm SVSEL : SInstBF16<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; +defm SVSPLICE : SInstBF16<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; +defm SVTRN1 : SInstBF16<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; +defm SVTRN2 : SInstBF16<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; +defm SVUZP1 : SInstBF16<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1">; +defm SVUZP2 : SInstBF16<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2">; +defm SVZIP1 : SInstBF16<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1">; +defm SVZIP2 : SInstBF16<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2">; def SVREV_B : SInst<"svrev_{d}", "PP", "PcPsPiPl", MergeNone, "aarch64_sve_rev">; def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">; @@ -1280,11 +1279,8 @@ def SVCNTD : SInst<"svcntd", "n", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp">; -def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; -let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { -def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone>; -} +defm SVLEN : SInstBF16<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; //////////////////////////////////////////////////////////////////////////////// // Saturating scalar arithmetic @@ -1368,22 +1364,14 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">; -def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">; } -let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64) && defined(__ARM_FEATURE_SVE_BF16)" in { -def SVTRN1Q_BF16 : SInst<"svtrn1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q_BF16 : SInst<"svtrn2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q_BF16 : SInst<"svuzp1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q_BF16 : SInst<"svuzp2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q_BF16 : SInst<"svzip1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q_BF16 : SInst<"svzip2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2q">; -} +defm SVTRN1Q : SInstBF16<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; +defm SVTRN2Q : SInstBF16<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; +defm SVUZP1Q : SInstBF16<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; +defm SVUZP2Q : SInstBF16<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; +defm SVZIP1Q : SInstBF16<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; +defm SVZIP2Q : SInstBF16<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q", [], [], ["__ARM_FEATURE_SVE_MATMUL_FP64"]>; //////////////////////////////////////////////////////////////////////////////// // Vector creation @@ -1953,20 +1941,16 @@ // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW]>; -def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW]>; def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW]>; def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW]>; -def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW]>; def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; } -let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { -def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; -def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; -} +defm SVWHILERW_H : SInstBF16<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW], [], ["__ARM_FEATURE_SVE2"]>; +defm SVWHILEWR_H : SInstBF16<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW], [], ["__ARM_FEATURE_SVE2"]>; //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute