diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -269,4 +269,5 @@ #undef BUILTIN #undef LANGBUILTIN +#undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def --- a/clang/include/clang/Basic/BuiltinsARM.def +++ b/clang/include/clang/Basic/BuiltinsARM.def @@ -343,4 +343,5 @@ #undef BUILTIN #undef LANGBUILTIN +#undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsNEON.def b/clang/include/clang/Basic/BuiltinsNEON.def --- a/clang/include/clang/Basic/BuiltinsNEON.def +++ b/clang/include/clang/Basic/BuiltinsNEON.def @@ -19,3 +19,4 @@ #undef GET_NEON_BUILTINS #undef BUILTIN +#undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsSME.def b/clang/include/clang/Basic/BuiltinsSME.def new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsSME.def @@ -0,0 +1,21 @@ +//===--- BuiltinsSME.def - SME Builtin function database --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SME-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// The format of this database matches clang/Basic/Builtins.def. + +#define GET_SME_BUILTINS +#include "clang/Basic/arm_sme_builtins.inc" +#undef GET_SME_BUILTINS + +#undef BUILTIN +#undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -72,6 +72,15 @@ clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks SOURCE arm_sve.td TARGET ClangARMSveSemaRangeChecks) +clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltins) +clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltinCG) +clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks + SOURCE arm_sme.td + TARGET ClangARMSmeSemaRangeChecks) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -48,11 +48,22 @@ enum { LastNEONBuiltin = NEON::FirstTSBuiltin - 1, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID, #include "clang/Basic/BuiltinsSVE.def" FirstTSBuiltin, }; } + namespace SME { + enum { + LastSVEBuiltin = SVE::FirstTSBuiltin - 1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID, +#include "clang/Basic/BuiltinsSME.def" + FirstTSBuiltin, + }; + } + /// AArch64 builtins namespace AArch64 { enum { @@ -60,6 +71,8 @@ LastNEONBuiltin = NEON::FirstTSBuiltin - 1, FirstSVEBuiltin = NEON::FirstTSBuiltin, LastSVEBuiltin = SVE::FirstTSBuiltin - 1, + FirstSMEBuiltin = SVE::FirstTSBuiltin, + LastSMEBuiltin = SME::FirstTSBuiltin - 1, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, #include "clang/Basic/BuiltinsAArch64.def" LastTSBuiltin diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/arm_sme.td @@ -0,0 +1,74 @@ +//===--- arm_sme.td - ARM SME compiler interface ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TableGen definitions from which the ARM SME header +// file will be generated. See: +// +// https://developer.arm.com/architectures/system-architectures/software-standards/acle +// +//===----------------------------------------------------------------------===// + +include "arm_sve_sme_incl.td" + +//////////////////////////////////////////////////////////////////////////////// +// Loads + +multiclass ZALoad ch> { + let TargetGuard = "sme" in { + def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimiPQ", t, + [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + MemEltTyDefault, i_prefix # "_horiz", ch>; + + def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimiPQl", t, + [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + MemEltTyDefault, i_prefix # "_horiz", ch>; + + def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimiPQ", t, + [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + MemEltTyDefault, i_prefix # "_vert", ch>; + + def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimiPQl", t, + [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], + MemEltTyDefault, i_prefix # "_vert", ch>; + } +} + +defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; + +//////////////////////////////////////////////////////////////////////////////// +// Stores + +multiclass ZAStore ch> { + let TargetGuard = "sme" in { + def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimiP%", t, + [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + MemEltTyDefault, i_prefix # "_horiz", ch>; + + def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimiP%l", t, + [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + MemEltTyDefault, i_prefix # "_horiz", ch>; + + def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimiP%", t, + [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + MemEltTyDefault, i_prefix # "_vert", ch>; + + def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimiP%l", t, + [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], + MemEltTyDefault, i_prefix # "_vert", ch>; + } +} + +defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -13,253 +13,7 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Instruction definitions -//===----------------------------------------------------------------------===// -// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and -// a sequence of typespecs. -// -// The name is the base name of the intrinsic, for example "svld1". This is -// then mangled by the tblgen backend to add type information ("svld1_s16"). -// -// A typespec is a sequence of uppercase characters (modifiers) followed by one -// lowercase character. A typespec encodes a particular "base type" of the -// intrinsic. -// -// An example typespec is "Us" - unsigned short - svuint16_t. The available -// typespec codes are given below. -// -// The string given to an Inst class is a sequence of typespecs. The intrinsic -// is instantiated for every typespec in the sequence. For example "sdUsUd". -// -// The prototype is a string that defines the return type of the intrinsic -// and the type of each argument. The return type and every argument gets a -// "modifier" that can change in some way the "base type" of the intrinsic. -// -// The modifier 'd' means "default" and does not modify the base type in any -// way. The available modifiers are given below. -// -// Typespecs -// --------- -// c: char -// s: short -// i: int -// l: long -// f: float -// h: half-float -// d: double -// b: bfloat - -// Typespec modifiers -// ------------------ -// P: boolean -// U: unsigned - -// Prototype modifiers -// ------------------- -// prototype: return (arg, arg, ...) -// -// 2,3,4: array of default vectors -// v: void -// x: vector of signed integers -// u: vector of unsigned integers -// d: default -// c: const pointer type -// P: predicate type -// s: scalar of element type -// a: scalar of element type (splat to vector type) -// R: scalar of 1/2 width element type (splat to vector type) -// r: scalar of 1/4 width element type (splat to vector type) -// @: unsigned scalar of 1/4 width element type (splat to vector type) -// e: 1/2 width unsigned elements, 2x element count -// b: 1/4 width unsigned elements, 4x element count -// h: 1/2 width elements, 2x element count -// q: 1/4 width elements, 4x element count -// o: 4x width elements, 1/4 element count -// -// w: vector of element type promoted to 64bits, vector maintains -// signedness of its element type. -// f: element type promoted to uint64_t (splat to vector type) -// j: element type promoted to 64bits (splat to vector type) -// K: element type bitcast to a signed integer (splat to vector type) -// L: element type bitcast to an unsigned integer (splat to vector type) -// -// i: constant uint64_t -// k: int32_t -// l: int64_t -// m: uint32_t -// n: uint64_t - -// t: svint32_t -// z: svuint32_t -// g: svuint64_t -// O: svfloat16_t -// M: svfloat32_t -// N: svfloat64_t - -// J: Prefetch type (sv_prfop) -// A: pointer to int8_t -// B: pointer to int16_t -// C: pointer to int32_t -// D: pointer to int64_t - -// E: pointer to uint8_t -// F: pointer to uint16_t -// G: pointer to uint32_t -// H: pointer to uint64_t - -// Q: const pointer to void - -// S: const pointer to int8_t -// T: const pointer to int16_t -// U: const pointer to int32_t -// V: const pointer to int64_t -// -// W: const pointer to uint8_t -// X: const pointer to uint16_t -// Y: const pointer to uint32_t -// Z: const pointer to uint64_t - -class MergeType { - int Value = val; - string Suffix = suffix; -} -def MergeNone : MergeType<0>; -def MergeAny : MergeType<1, "_x">; -def MergeOp1 : MergeType<2, "_m">; -def MergeZero : MergeType<3, "_z">; -def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit -def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument. - -class EltType { - int Value = val; -} -def EltTyInvalid : EltType<0>; -def EltTyInt8 : EltType<1>; -def EltTyInt16 : EltType<2>; -def EltTyInt32 : EltType<3>; -def EltTyInt64 : EltType<4>; -def EltTyFloat16 : EltType<5>; -def EltTyFloat32 : EltType<6>; -def EltTyFloat64 : EltType<7>; -def EltTyBool8 : EltType<8>; -def EltTyBool16 : EltType<9>; -def EltTyBool32 : EltType<10>; -def EltTyBool64 : EltType<11>; -def EltTyBFloat16 : EltType<12>; - -class MemEltType { - int Value = val; -} -def MemEltTyDefault : MemEltType<0>; -def MemEltTyInt8 : MemEltType<1>; -def MemEltTyInt16 : MemEltType<2>; -def MemEltTyInt32 : MemEltType<3>; -def MemEltTyInt64 : MemEltType<4>; - -class FlagType { - int Value = val; -} - -// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h -// and include/clang/Basic/TargetBuiltins.h -def NoFlags : FlagType<0x00000000>; -def FirstEltType : FlagType<0x00000001>; -// : : -// : : -def EltTypeMask : FlagType<0x0000000f>; -def FirstMemEltType : FlagType<0x00000010>; -// : : -// : : -def MemEltTypeMask : FlagType<0x00000070>; -def FirstMergeTypeMask : FlagType<0x00000080>; -// : : -// : : -def MergeTypeMask : FlagType<0x00000380>; -def FirstSplatOperand : FlagType<0x00000400>; -// : : -// These flags are used to specify which scalar operand -// needs to be duplicated/splatted into a vector. -// : : -def SplatOperandMask : FlagType<0x00001C00>; -def IsLoad : FlagType<0x00002000>; -def IsStore : FlagType<0x00004000>; -def IsGatherLoad : FlagType<0x00008000>; -def IsScatterStore : FlagType<0x00010000>; -def IsStructLoad : FlagType<0x00020000>; -def IsStructStore : FlagType<0x00040000>; -def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default -def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. -def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. -def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. -def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. -def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. -def IsByteIndexed : FlagType<0x01000000>; -def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. -def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. -def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. -def IsGatherPrefetch : FlagType<0x10000000>; -def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. -def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. -def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. -def IsTupleCreate : FlagType<0x100000000>; -def IsTupleGet : FlagType<0x200000000>; -def IsTupleSet : FlagType<0x400000000>; -def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X. -def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X. - -// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h -class ImmCheckType { - int Value = val; -} -def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) -def ImmCheck1_16 : ImmCheckType<1>; // 1..16 -def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1) -def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) -def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2 -def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) -def ImmCheck0_7 : ImmCheckType<6>; // 0..7 -def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1) -def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1) -def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) -def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270] -def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] -def ImmCheck0_13 : ImmCheckType<12>; // 0..13 -def ImmCheck0_1 : ImmCheckType<13>; // 0..1 -def ImmCheck0_2 : ImmCheckType<14>; // 0..2 -def ImmCheck0_3 : ImmCheckType<15>; // 0..3 - -class ImmCheck { - int Arg = arg; - int EltSizeArg = eltSizeArg; - ImmCheckType Kind = kind; -} - -class Inst ft, list ch, MemEltType met> { - string Name = n; - string Prototype = p; - string Types = t; - string TargetGuard = "sve"; - int Merge = mt.Value; - string MergeSuffix = mt.Suffix; - string LLVMIntrinsic = i; - list Flags = ft; - list ImmChecks = ch; - int MemEltType = met.Value; -} - -// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8") -class SInst ft = [], list ch = []> - : Inst { -} - -// MInst: Instructions which access memory -class MInst f, - MemEltType met = MemEltTyDefault, string i = ""> - : Inst { -} +include "arm_sve_sme_incl.td" //////////////////////////////////////////////////////////////////////////////// // Loads diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -0,0 +1,274 @@ +//===--- arm_sve_sme_incl.td - ARM SVE/SME compiler interface -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common properites of TableGen definitions use for both +// SVE and SME intrinsics. +// +// https://developer.arm.com/architectures/system-architectures/software-standards/acle +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction definitions +//===----------------------------------------------------------------------===// +// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and +// a sequence of typespecs. +// +// The name is the base name of the intrinsic, for example "svld1". This is +// then mangled by the tblgen backend to add type information ("svld1_s16"). +// +// A typespec is a sequence of uppercase characters (modifiers) followed by one +// lowercase character. A typespec encodes a particular "base type" of the +// intrinsic. +// +// An example typespec is "Us" - unsigned short - svuint16_t. The available +// typespec codes are given below. +// +// The string given to an Inst class is a sequence of typespecs. The intrinsic +// is instantiated for every typespec in the sequence. For example "sdUsUd". +// +// The prototype is a string that defines the return type of the intrinsic +// and the type of each argument. The return type and every argument gets a +// "modifier" that can change in some way the "base type" of the intrinsic. +// +// The modifier 'd' means "default" and does not modify the base type in any +// way. The available modifiers are given below. +// +// Typespecs +// --------- +// c: char +// s: short +// i: int +// l: long +// q: int128_t +// f: float +// h: half-float +// d: double +// b: bfloat + +// Typespec modifiers +// ------------------ +// P: boolean +// U: unsigned + +// Prototype modifiers +// ------------------- +// prototype: return (arg, arg, ...) +// +// 2,3,4: array of default vectors +// v: void +// x: vector of signed integers +// u: vector of unsigned integers +// d: default +// c: const pointer type +// P: predicate type +// s: scalar of element type +// a: scalar of element type (splat to vector type) +// R: scalar of 1/2 width element type (splat to vector type) +// r: scalar of 1/4 width element type (splat to vector type) +// @: unsigned scalar of 1/4 width element type (splat to vector type) +// e: 1/2 width unsigned elements, 2x element count +// b: 1/4 width unsigned elements, 4x element count +// h: 1/2 width elements, 2x element count +// q: 1/4 width elements, 4x element count +// o: 4x width elements, 1/4 element count +// +// w: vector of element type promoted to 64bits, vector maintains +// signedness of its element type. +// f: element type promoted to uint64_t (splat to vector type) +// j: element type promoted to 64bits (splat to vector type) +// K: element type bitcast to a signed integer (splat to vector type) +// L: element type bitcast to an unsigned integer (splat to vector type) +// +// i: constant uint64_t +// k: int32_t +// l: int64_t +// m: uint32_t +// n: uint64_t + +// t: svint32_t +// z: svuint32_t +// g: svuint64_t +// O: svfloat16_t +// M: svfloat32_t +// N: svfloat64_t + +// J: Prefetch type (sv_prfop) + +// %: pointer to void + +// A: pointer to int8_t +// B: pointer to int16_t +// C: pointer to int32_t +// D: pointer to int64_t + +// E: pointer to uint8_t +// F: pointer to uint16_t +// G: pointer to uint32_t +// H: pointer to uint64_t + +// Q: const pointer to void + +// S: const pointer to int8_t +// T: const pointer to int16_t +// U: const pointer to int32_t +// V: const pointer to int64_t +// +// W: const pointer to uint8_t +// X: const pointer to uint16_t +// Y: const pointer to uint32_t +// Z: const pointer to uint64_t + +class MergeType { + int Value = val; + string Suffix = suffix; +} +def MergeNone : MergeType<0>; +def MergeAny : MergeType<1, "_x">; +def MergeOp1 : MergeType<2, "_m">; +def MergeZero : MergeType<3, "_z">; +def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit +def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument. + +class EltType { + int Value = val; +} +def EltTyInvalid : EltType<0>; +def EltTyInt8 : EltType<1>; +def EltTyInt16 : EltType<2>; +def EltTyInt32 : EltType<3>; +def EltTyInt64 : EltType<4>; +def EltTyInt128 : EltType<5>; +def EltTyFloat16 : EltType<6>; +def EltTyFloat32 : EltType<7>; +def EltTyFloat64 : EltType<8>; +def EltTyBool8 : EltType<9>; +def EltTyBool16 : EltType<10>; +def EltTyBool32 : EltType<11>; +def EltTyBool64 : EltType<12>; +def EltTyBFloat16 : EltType<13>; + +class MemEltType { + int Value = val; +} +def MemEltTyDefault : MemEltType<0>; +def MemEltTyInt8 : MemEltType<1>; +def MemEltTyInt16 : MemEltType<2>; +def MemEltTyInt32 : MemEltType<3>; +def MemEltTyInt64 : MemEltType<4>; + +class FlagType { + int Value = val; +} + +// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h +// and include/clang/Basic/TargetBuiltins.h +def NoFlags : FlagType<0x00000000>; +def FirstEltType : FlagType<0x00000001>; +// : : +// : : +def EltTypeMask : FlagType<0x0000000f>; +def FirstMemEltType : FlagType<0x00000010>; +// : : +// : : +def MemEltTypeMask : FlagType<0x00000070>; +def FirstMergeTypeMask : FlagType<0x00000080>; +// : : +// : : +def MergeTypeMask : FlagType<0x00000380>; +def FirstSplatOperand : FlagType<0x00000400>; +// : : +// These flags are used to specify which scalar operand +// needs to be duplicated/splatted into a vector. +// : : +def SplatOperandMask : FlagType<0x00001C00>; +def IsLoad : FlagType<0x00002000>; +def IsStore : FlagType<0x00004000>; +def IsGatherLoad : FlagType<0x00008000>; +def IsScatterStore : FlagType<0x00010000>; +def IsStructLoad : FlagType<0x00020000>; +def IsStructStore : FlagType<0x00040000>; +def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default +def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. +def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. +def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. +def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. +def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. +def IsByteIndexed : FlagType<0x01000000>; +def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. +def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. +def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x10000000>; +def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. +def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. +def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. +def IsTupleCreate : FlagType<0x100000000>; +def IsTupleGet : FlagType<0x200000000>; +def IsTupleSet : FlagType<0x400000000>; +def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X. +def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X. +def IsStreaming : FlagType<0x2000000000>; +def IsStreamingCompatible : FlagType<0x4000000000>; +def IsSharedZA : FlagType<0x8000000000>; +def IsPreservesZA : FlagType<0x10000000000>; + +// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h +class ImmCheckType { + int Value = val; +} +def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) +def ImmCheck1_16 : ImmCheckType<1>; // 1..16 +def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1) +def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) +def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2 +def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) +def ImmCheck0_7 : ImmCheckType<6>; // 0..7 +def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) +def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270] +def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] +def ImmCheck0_13 : ImmCheckType<12>; // 0..13 +def ImmCheck0_1 : ImmCheckType<13>; // 0..1 +def ImmCheck0_2 : ImmCheckType<14>; // 0..2 +def ImmCheck0_3 : ImmCheckType<15>; // 0..3 +def ImmCheck0_0 : ImmCheckType<16>; // 0..0 +def ImmCheck0_15 : ImmCheckType<17>; // 0..15 + +class ImmCheck { + int Arg = arg; + int EltSizeArg = eltSizeArg; + ImmCheckType Kind = kind; +} + +class Inst ft, list ch, MemEltType met> { + string Name = n; + string Prototype = p; + string Types = t; + string TargetGuard = "sve"; + int Merge = mt.Value; + string MergeSuffix = mt.Suffix; + string LLVMIntrinsic = i; + list Flags = ft; + list ImmChecks = ch; + int MemEltType = met.Value; +} + +// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8") +class SInst ft = [], list ch = []> + : Inst { +} + +// MInst: Instructions which access memory +class MInst f, + MemEltType met = MemEltTyDefault, string i = "", + list ch = []> + : Inst { +} diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -37,6 +37,12 @@ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #include "clang/Basic/BuiltinsSVE.def" +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ + {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#include "clang/Basic/BuiltinsSME.def" + #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ @@ -772,16 +778,19 @@ if (Feature == "+sme") { HasSME = true; HasBFloat16 = true; + HasFullFP16 = true; } if (Feature == "+sme-f64f64") { HasSME = true; HasSMEF64F64 = true; HasBFloat16 = true; + HasFullFP16 = true; } if (Feature == "+sme-i16i64") { HasSME = true; HasSMEI16I64 = true; HasBFloat16 = true; + HasFullFP16 = true; } if (Feature == "+sb") HasSB = true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6759,11 +6759,29 @@ #undef SVEMAP1 #undef SVEMAP2 +#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + { \ + #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier \ + } + +#define SMEMAP2(NameBase, TypeModifier) \ + { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } +static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { +#define GET_SME_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sme_builtin_cg.inc" +#undef GET_SME_LLVM_INTRINSIC_MAP +}; + +#undef SMEMAP1 +#undef SMEMAP2 + static bool NEONSIMDIntrinsicsProvenSorted = false; static bool AArch64SIMDIntrinsicsProvenSorted = false; static bool AArch64SISDIntrinsicsProvenSorted = false; static bool AArch64SVEIntrinsicsProvenSorted = false; +static bool AArch64SMEIntrinsicsProvenSorted = false; static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef IntrinsicMap, @@ -8912,6 +8930,8 @@ return Builder.getInt32Ty(); case SVETypeFlags::EltTyInt64: return Builder.getInt64Ty(); + case SVETypeFlags::EltTyInt128: + return Builder.getInt128Ty(); case SVETypeFlags::EltTyFloat16: return Builder.getHalfTy(); @@ -9030,6 +9050,7 @@ switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); + case 1: case 2: case 4: case 8: @@ -9391,6 +9412,41 @@ return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + +Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + Ops[3] = EmitSVEPredicateCast( + Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); + + SmallVector NewOps; + NewOps.push_back(Ops[3]); + + llvm::Value *BasePtr = Ops[4]; + + // If the intrinsic contains the vnum parameter, multiply it with the vector + // size in bytes. + if (Ops.size() == 6) { + Function *StreamingVectorLength = + CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *StreamingVectorLengthCall = + Builder.CreateCall(StreamingVectorLength); + llvm::Value *Mulvl = + Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + // The type of the ptr parameter is void *, so use Int8Ty here. + BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl); + } + NewOps.push_back(BasePtr); + NewOps.push_back(Ops[0]); + NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + Function *F = CGM.getIntrinsic(IntID); + return Builder.CreateCall(F, NewOps); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9817,6 +9873,43 @@ return nullptr; } +Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + + llvm::SmallVector Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if ((ICEArguments & (1 << i)) == 0) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + else { + // If this is required to be a constant, constant fold it so that we know + // that the generated intrinsic gets a ConstantInt. + std::optional Result = + E->getArg(i)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + + // Immediates for SVE llvm intrinsics are always 32bit. We can safely + // truncate because the immediate has been range checked and no valid + // immediate requires more than a handful of bits. + *Result = Result->extOrTrunc(32); + Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); + } + } + + auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, + AArch64SMEIntrinsicsProvenSorted); + SVETypeFlags TypeFlags(Builtin->TypeModifier); + if (TypeFlags.isLoad() || TypeFlags.isStore()) + return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); + + /// Should not happen + return nullptr; +} + Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -9824,6 +9917,10 @@ BuiltinID <= clang::AArch64::LastSVEBuiltin) return EmitAArch64SVEBuiltinExpr(BuiltinID, E); + if (BuiltinID >= clang::AArch64::FirstSMEBuiltin && + BuiltinID <= clang::AArch64::LastSMEBuiltin) + return EmitAArch64SMEBuiltinExpr(BuiltinID, E); + unsigned HintID = static_cast(-1); switch (BuiltinID) { default: break; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4245,6 +4245,7 @@ llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl &Ops, unsigned BuiltinID); + llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); @@ -4259,6 +4260,11 @@ unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); + llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -340,6 +340,8 @@ clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h) # Generate arm_sve.h clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h) + # Generate arm_sme_draft_spec_subject_to_change.h + clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme_draft_spec_subject_to_change.h) # Generate arm_bf16.h clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h) # Generate arm_mve.h @@ -360,6 +362,7 @@ list(APPEND aarch64_only_generated_files "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h" + "${CMAKE_CURRENT_BINARY_DIR}/arm_sme_draft_spec_subject_to_change.h" "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h" ) endif() diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2879,6 +2879,9 @@ #define GET_SVE_IMMEDIATE_CHECK #include "clang/Basic/arm_sve_sema_rangechecks.inc" #undef GET_SVE_IMMEDIATE_CHECK +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK } // Perform all the immediate checks for this builtin call. @@ -2984,6 +2987,14 @@ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) HasError = true; break; + case SVETypeFlags::ImmCheck0_0: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_15: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) + HasError = true; + break; } } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5772,6 +5772,14 @@ BuiltinID <= AArch64::LastSVEBuiltin; } +static bool ArmSmeAliasValid(ASTContext &Context, unsigned BuiltinID, + StringRef AliasName) { + if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID)) + BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID); + return BuiltinID >= AArch64::FirstSMEBuiltin && + BuiltinID <= AArch64::LastSMEBuiltin; +} + static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type) @@ -5784,7 +5792,8 @@ StringRef AliasName = cast(D)->getIdentifier()->getName(); bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64(); - if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) || + if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName) && + !ArmSmeAliasValid(S.Context, BuiltinID, AliasName)) || (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) && !ArmCdeAliasValid(BuiltinID, AliasName))) { S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -0,0 +1,148 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef DISABLE_SME_ATTRIBUTES +#define ARM_STREAMING_ATTR +#else +#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) +#endif + +// CHECK-C-LABEL: @test_svld1_hor_za8( +// CHECK-CXX-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za8(0, slice_base, 0, pg, ptr); + svld1_hor_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_hor_za16( +// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za16ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za16(0, slice_base, 0, pg, ptr); + svld1_hor_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_hor_za32( +// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za32ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za32(0, slice_base, 0, pg, ptr); + svld1_hor_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_hor_za64( +// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za64ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za64(0, slice_base, 0, pg, ptr); + svld1_hor_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_hor_za128( +// CHECK-CXX-LABEL: @_Z20test_svld1_hor_za128ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_hor_za128(0, slice_base, 0, pg, ptr); + svld1_hor_za128(15, slice_base, 0, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_ver_za8( +// CHECK-CXX-LABEL: @_Z18test_svld1_ver_za8ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za8(0, slice_base, 0, pg, ptr); + svld1_ver_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_ver_za16( +// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za16ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za16(0, slice_base, 0, pg, ptr); + svld1_ver_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_ver_za32( +// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za32ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za32(0, slice_base, 0, pg, ptr); + svld1_ver_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_ver_za64( +// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za64ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za64(0, slice_base, 0, pg, ptr); + svld1_ver_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-C-LABEL: @test_svld1_ver_za128( +// CHECK-CXX-LABEL: @_Z20test_svld1_ver_za128ju10__SVBool_tPKv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { + svld1_ver_za128(0, slice_base, 0, pg, ptr); + svld1_ver_za128(15, slice_base, 0, pg, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -0,0 +1,178 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef DISABLE_SME_ATTRIBUTES +#define ARM_STREAMING_ATTR +#else +#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) +#endif + +// CHECK-C-LABEL: @test_svld1_hor_vnum_za8( +// CHECK-CXX-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_hor_vnum_za16( +// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_hor_vnum_za32( +// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_hor_vnum_za64( +// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_hor_vnum_za128( +// CHECK-CXX-LABEL: @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_ver_hor_za8( +// CHECK-CXX-LABEL: @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_ver_vnum_za16( +// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_ver_vnum_za32( +// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_ver_vnum_za64( +// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svld1_ver_vnum_za128( +// CHECK-CXX-LABEL: @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { + svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -0,0 +1,148 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef DISABLE_SME_ATTRIBUTES +#define ARM_STREAMING_ATTR +#else +#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) +#endif + +// CHECK-C-LABEL: @test_svst1_hor_za8( +// CHECK-CXX-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za8(0, slice_base, 0, pg, ptr); + svst1_hor_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_hor_za16( +// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za16ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za16(0, slice_base, 0, pg, ptr); + svst1_hor_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_hor_za32( +// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za32ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za32(0, slice_base, 0, pg, ptr); + svst1_hor_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_hor_za64( +// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za64ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za64(0, slice_base, 0, pg, ptr); + svst1_hor_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_hor_za128( +// CHECK-CXX-LABEL: @_Z20test_svst1_hor_za128ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_hor_za128(0, slice_base, 0, pg, ptr); + svst1_hor_za128(15, slice_base, 0, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_ver_za8( +// CHECK-CXX-LABEL: @_Z18test_svst1_ver_za8ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za8(0, slice_base, 0, pg, ptr); + svst1_ver_za8(0, slice_base, 15, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_ver_za16( +// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za16ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za16(0, slice_base, 0, pg, ptr); + svst1_ver_za16(1, slice_base, 7, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_ver_za32( +// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za32ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za32(0, slice_base, 0, pg, ptr); + svst1_ver_za32(3, slice_base, 3, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_ver_za64( +// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za64ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za64(0, slice_base, 0, pg, ptr); + svst1_ver_za64(7, slice_base, 1, pg, ptr); +} + +// CHECK-C-LABEL: @test_svst1_ver_za128( +// CHECK-CXX-LABEL: @_Z20test_svst1_ver_za128ju10__SVBool_tPv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) { + svst1_ver_za128(0, slice_base, 0, pg, ptr); + svst1_ver_za128(15, slice_base, 0, pg, ptr); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c @@ -0,0 +1,178 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef DISABLE_SME_ATTRIBUTES +#define ARM_STREAMING_ATTR +#else +#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) +#endif + +// CHECK-C-LABEL: @test_svst1_hor_vnum_za8( +// CHECK-CXX-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_hor_vnum_za16( +// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_hor_vnum_za32( +// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_hor_vnum_za64( +// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_hor_vnum_za128( +// CHECK-CXX-LABEL: @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svst1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_ver_vnum_za8( +// CHECK-CXX-LABEL: @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_ver_vnum_za16( +// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_ver_vnum_za32( +// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_ver_vnum_za64( +// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); +} + +// CHECK-C-LABEL: @test_svst1_ver_vnum_za128( +// CHECK-CXX-LABEL: @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPvl( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]] +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-NEXT: ret void +// +ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { + svst1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); + svst1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); +} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -0,0 +1,131 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#include + +#ifdef DISABLE_SME_ATTRIBUTES +#define ARM_STREAMING_ATTR +#else +#define ARM_STREAMING_ATTR __attribute__((arm_streaming)) +#endif + +ARM_STREAMING_ATTR +void test_range_0_0(svbool_t pg, void *ptr) { + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svst1_ver_za8,,,)(1, -1, 15, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svld1_hor_za128,,,)(0, -1, -1, pg, ptr); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svst1_ver_za128,,,)(15, -1, 1, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(-1, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(1, -1, 15, pg, ptr, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(0, -1, -1, pg, ptr, 1); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(15, -1, 1, pg, ptr, 1); +} + +ARM_STREAMING_ATTR +void test_range_0_1(svbool_t pg, void *ptr) { + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svst1_ver_za16,,,)(2, -1, 7, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svld1_hor_za64,,,)(0, -1, -1, pg, ptr); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svst1_ver_za64,,,)(7, -1, 2, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(-1, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(2, -1, 7, pg, ptr, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(0, -1, -1, pg, ptr, 1); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(7, -1, 2, pg, ptr, 1); +} + +ARM_STREAMING_ATTR +void test_range_0_3(svbool_t pg, void *ptr) { + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svst1_ver_za32,,,)(4, -1, 3, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svld1_hor_za32,,,)(0, -1, -1, pg, ptr); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svst1_ver_za32,,,)(3, -1, 4, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(-1, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(4, -1, 3, pg, ptr, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(0, -1, -1, pg, ptr, 1); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(3, -1, 4, pg, ptr, 1); +} + +ARM_STREAMING_ATTR +void test_range_0_7(svbool_t pg, void *ptr) { + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svst1_ver_za64,,,)(8, -1, 1, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svld1_hor_za16,,,)(0, -1, -1, pg, ptr); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svst1_ver_za16,,,)(1, -1, 8, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(-1, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(8, -1, 1, pg, ptr, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(0, -1, -1, pg, ptr, 1); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(1, -1, 8, pg, ptr, 1); +} + +ARM_STREAMING_ATTR +void test_range_0_15(svbool_t pg, void *ptr) { + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svst1_ver_za128,,,)(16, -1, 0, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svld1_hor_za8,,,)(0, -1, -1, pg, ptr); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svst1_ver_za8,,,)(0, -1, 16, pg, ptr); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(-1, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, -1, 0, pg, ptr, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(0, -1, -1, pg, ptr, 1); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(0, -1, 16, pg, ptr, 1); +} + +ARM_STREAMING_ATTR +void test_constant(uint64_t u64, svbool_t pg, void *ptr) { + SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}} + SVE_ACLE_FUNC(svld1_ver_za16,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svld1_ver_za16' must be a constant integer}} + SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}} + SVE_ACLE_FUNC(svst1_ver_za64,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svst1_ver_za64' must be a constant integer}} + SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} + SVE_ACLE_FUNC(svld1_ver_vnum_za16,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svld1_ver_vnum_za16' must be a constant integer}} + SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} + SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svst1_ver_vnum_za64' must be a constant integer}} +} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fsyntax-only -verify -emit-llvm -o - %s +// REQUIRES: aarch64-registered-target + +// Test that functions with the correct target attributes can use the correct SME intrinsics. + +#include + +__attribute__((target("sme"))) +void test_sme(svbool_t pg, void *ptr) { + svld1_hor_za8(0, 0, 0, pg, ptr); +} + +__attribute__((target("arch=armv8-a+sme"))) +void test_arch_sme(svbool_t pg, void *ptr) { + svld1_hor_vnum_za32(0, 0, 0, pg, ptr, 0); +} + +__attribute__((target("+sme"))) +void test_plus_sme(svbool_t pg, void *ptr) { + svst1_ver_za16(0, 0, 0, pg, ptr); +} + +void undefined(svbool_t pg, void *ptr) { + svst1_ver_vnum_za64(0, 0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -228,7 +228,7 @@ } /// Emits the intrinsic declaration to the ostream. - void emitIntrinsic(raw_ostream &OS) const; + void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const; private: std::string getMergeSuffix() const { return MergeSuffix; } @@ -347,8 +347,21 @@ /// Create the SVETypeFlags used in CGBuiltins void createTypeFlags(raw_ostream &o); + /// Emit arm_sme.h. + void createSMEHeader(raw_ostream &o); + + /// Emit all the SME __builtin prototypes and code needed by Sema. + void createSMEBuiltins(raw_ostream &o); + + /// Emit all the information needed to map builtin -> LLVM IR intrinsic. + void createSMECodeGenMap(raw_ostream &o); + + /// Emit all the range checks for the immediates. + void createSMERangeChecks(raw_ostream &o); + /// Create intrinsic and add it to \p Out - void createIntrinsic(Record *R, SmallVectorImpl> &Out); + void createIntrinsic(Record *R, + SmallVectorImpl> &Out); }; } // end anonymous namespace @@ -481,6 +494,9 @@ case 'l': ElementBitwidth = 64; break; + case 'q': + ElementBitwidth = 128; + break; case 'h': Float = true; ElementBitwidth = 16; @@ -758,6 +774,11 @@ NumVectors = 0; Signed = true; break; + case '%': + Pointer = true; + Void = true; + NumVectors = 0; + break; case 'A': Pointer = true; ElementBitwidth = Bitwidth = 8; @@ -919,15 +940,29 @@ getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); + std::string SMEAttrs = ""; + + if (Flags & Emitter.getEnumValueForFlag("IsStreaming")) + SMEAttrs += ", arm_streaming"; + if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible")) + SMEAttrs += ", arm_streaming_compatible"; + if (Flags & Emitter.getEnumValueForFlag("IsSharedZA")) + SMEAttrs += ", arm_shared_za"; + if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA")) + SMEAttrs += ", arm_preserves_za"; OS << (IsOverloaded ? "__aio " : "__ai ") << "__attribute__((__clang_arm_builtin_alias(" - << "__builtin_sve_" << FullName << ")))\n"; + << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_") + << FullName << ")"; + if (!SMEAttrs.empty()) + OS << SMEAttrs; + OS << "))\n"; OS << getTypes()[0].str() << " " << ProtoName << "("; for (unsigned I = 0; I < getTypes().size() - 1; ++I) { @@ -984,6 +1019,8 @@ return encodeEltType("EltTyInt32"); case 64: return encodeEltType("EltTyInt64"); + case 128: + return encodeEltType("EltTyInt128"); default: llvm_unreachable("Unhandled integer element bitwidth!"); } @@ -1228,7 +1265,7 @@ // Actually emit the intrinsic declarations. for (auto &I : Defs) - I->emitIntrinsic(OS); + I->emitIntrinsic(OS, *this); OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n"; OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n"; @@ -1377,6 +1414,165 @@ OS << "#endif\n\n"; } +void SVEEmitter::createSMEHeader(raw_ostream &OS) { + OS << "/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics " + "------===\n" + " *\n" + " *\n" + " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " + "Exceptions.\n" + " * See https://llvm.org/LICENSE.txt for license information.\n" + " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" + " *\n" + " *===-----------------------------------------------------------------" + "------===\n" + " */\n\n"; + + OS << "#ifndef __ARM_SME_H\n"; + OS << "#define __ARM_SME_H\n\n"; + + OS << "#if !defined(__LITTLE_ENDIAN__)\n"; + OS << "#error \"Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h\"\n"; + OS << "#endif\n"; + + OS << "#include \n\n"; + + OS << "/* Function attributes */\n"; + OS << "#define __ai static __inline__ __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + OS << "#ifdef __cplusplus\n"; + OS << "extern \"C\" {\n"; + OS << "#endif\n\n"; + + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + // Sort intrinsics in header file by following order/priority similar to SVE: + // - Architectural guard + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort(Defs.begin(), Defs.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), + (unsigned)I->getClassKind(), + I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); + + // Actually emit the intrinsic declaration. + for (auto &I : Defs) { + I->emitIntrinsic(OS, *this); + } + + OS << "#ifdef __cplusplus\n"; + OS << "} // extern \"C\"\n"; + OS << "#endif\n\n"; + OS << "#undef __ai\n\n"; + OS << "#endif /* __ARM_SME_H */\n"; +} + +void SVEEmitter::createSMEBuiltins(raw_ostream &OS) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) { + createIntrinsic(R, Defs); + } + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return A->getMangledName() < B->getMangledName(); + }); + + OS << "#ifdef GET_SME_BUILTINS\n"; + for (auto &Def : Defs) { + // Only create BUILTINs for non-overloaded intrinsics, as overloaded + // declarations only live in the header file. + if (Def->getClassKind() != ClassG) + OS << "TARGET_BUILTIN(__builtin_sme_" << Def->getMangledName() << ", \"" + << Def->getBuiltinTypeStr() << "\", \"n\", \"" << Def->getGuard() + << "\")\n"; + } + + OS << "#endif\n\n"; +} + +void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) { + createIntrinsic(R, Defs); + } + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return A->getMangledName() < B->getMangledName(); + }); + + OS << "#ifdef GET_SME_LLVM_INTRINSIC_MAP\n"; + for (auto &Def : Defs) { + // Builtins only exist for non-overloaded intrinsics, overloaded + // declarations only live in the header file. + if (Def->getClassKind() == ClassG) + continue; + + uint64_t Flags = Def->getFlags(); + auto FlagString = std::to_string(Flags); + + std::string LLVMName = Def->getLLVMName(); + std::string Builtin = Def->getMangledName(); + if (!LLVMName.empty()) + OS << "SMEMAP1(" << Builtin << ", " << LLVMName << ", " << FlagString + << "),\n"; + else + OS << "SMEMAP2(" << Builtin << ", " << FlagString << "),\n"; + } + OS << "#endif\n\n"; +} + +void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) { + createIntrinsic(R, Defs); + } + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return A->getMangledName() < B->getMangledName(); + }); + + + OS << "#ifdef GET_SME_IMMEDIATE_CHECK\n"; + + // Ensure these are only emitted once. + std::set Emitted; + + for (auto &Def : Defs) { + if (Emitted.find(Def->getMangledName()) != Emitted.end() || + Def->getImmChecks().empty()) + continue; + + OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n"; + for (auto &Check : Def->getImmChecks()) + OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", " + << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n"; + OS << " break;\n"; + + Emitted.insert(Def->getMangledName()); + } + + OS << "#endif\n\n"; +} + namespace clang { void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createHeader(OS); @@ -1398,4 +1594,19 @@ SVEEmitter(Records).createTypeFlags(OS); } +void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSMEHeader(OS); +} + +void EmitSmeBuiltins(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSMEBuiltins(OS); +} + +void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSMECodeGenMap(OS); +} + +void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSMERangeChecks(OS); +} } // End namespace clang diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -82,6 +82,10 @@ GenArmSveBuiltinCG, GenArmSveTypeFlags, GenArmSveRangeChecks, + GenArmSmeHeader, + GenArmSmeBuiltins, + GenArmSmeBuiltinCG, + GenArmSmeRangeChecks, GenArmCdeHeader, GenArmCdeBuiltinDef, GenArmCdeBuiltinSema, @@ -226,6 +230,14 @@ "Generate arm_sve_typeflags.inc for clang"), clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks", "Generate arm_sve_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSmeHeader, "gen-arm-sme-header", + "Generate arm_sme.h for clang"), + clEnumValN(GenArmSmeBuiltins, "gen-arm-sme-builtins", + "Generate arm_sme_builtins.inc for clang"), + clEnumValN(GenArmSmeBuiltinCG, "gen-arm-sme-builtin-codegen", + "Generate arm_sme_builtin_cg_map.inc for clang"), + clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks", + "Generate arm_sme_sema_rangechecks.inc for clang"), clEnumValN(GenArmMveHeader, "gen-arm-mve-header", "Generate arm_mve.h for clang"), clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def", @@ -454,6 +466,18 @@ case GenArmSveRangeChecks: EmitSveRangeChecks(Records, OS); break; + case GenArmSmeHeader: + EmitSmeHeader(Records, OS); + break; + case GenArmSmeBuiltins: + EmitSmeBuiltins(Records, OS); + break; + case GenArmSmeBuiltinCG: + EmitSmeBuiltinCG(Records, OS); + break; + case GenArmSmeRangeChecks: + EmitSmeRangeChecks(Records, OS); + break; case GenArmCdeHeader: EmitCdeHeader(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -101,6 +101,11 @@ void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); + void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);