Index: docs/aarch64-omp-simd-support.md =================================================================== --- /dev/null +++ docs/aarch64-omp-simd-support.md @@ -0,0 +1,109 @@ +# OpenMP SIMD support in clang + +## Interfacing vector libraries with serial code + +`clang` can interface with user-provided vector libraries to vectorize +loops that invoke serial functions. + +The vector functions available in the library are exposed to the +compiler by using the `pragma omp declare simd` directive on the +function declarations of the public header file of the library. + +The following example shows the basic functionality: + +``` +// code in the library public header file "myvecroutines.h" +#pragma omp declare simd +int foo(double); + +// loop in the user code, in user_code.c +#include "path/to/myvecroutines.h" +void do_something(int * a, double * b, unsigned N) { + for (unsigned i = 0; i < N; ++i) { + a[i] = foo(b[i]); + } +} + +``` + +Compile the code by invoking clang with either the `-fopenmp` or the +`-fopenmp-simd` flags: + +``` +$> clang -target aarch64-linux-gnu -march=armv8a+simd -fopenmp \ + > -O3 -c user_code.c -o objfile.o +$> clang -target aarch64-linux-gnu -march=armv8a+sve -fopenmp \ + > -O3 -c user_code.c -o objfile.o +``` + +The object file that is produced links against `libmyvecroutines` and +executes using the vector version of `foo`. + +The library must expose the vector name that is associated to the +scalar function according to the *name mangling rules* section. + +## Level of support + +The current level of support covers the following features: + +* OpenMP 4.0 and 4.5 `declare simd`, with the exclusion of the + `uniform` and `aligned` clause, as the LLVM vectorizer is + not yet able to take advantage of them. The `linear` clause is + supported only for pointers, with a linear step of 1. +* The clause `simdlen` is supported, with the following restrictions: + 1. It must be a power of 2 when targeting Advanced SIMD + vectorization. + 2. It must adhere to the architectural limits of the SVE registers + when targeting SVE. + In particular, when `simdlen` is specified, the vector register + size constraints of SVE are verified using the widest data type + of the scalar function signature, for exmaple `double` in `float + foo(double)`. +* When no `simdlen` is specified, the number of lanes VLEN of a function is: + 1. When targeting Advanced SIMD, computed fitting the narrowest + type of the scalar function signature by fitting it in a + quadword (128-bit) vector register. + 2. When targeting SVE, unspecified (set to `x` in the mangled + name). The narrow types are unpacked in the vector parameters, + the widest are packed. +* The supported function signature in C and C++ are in the forms: + 1. `void (Ty1, Ty2...)` + 2. `Ty1 (Ty2, Ty3...)` + where `Ty1` are any of 8,16,32,64-bit + integral types or single and double precision floating-point values, + or pointers to them. Variadic functions and C++ methods are not supported. + +## Name mangling rules (draft) + +The vector name that is associated to the scalar name is generated +according to the grammar that follows, plus these rules: + +1. VLEN is the number of lanes that are processed by the function. It + is set to `x` when targeting SVE and no `simdlen` clause is used. +2. When `` is `M`, an additional vector input parameter (the _mask_) is + added at the end of the vector signature: + * `svbool_t` for SVE + * `uintXxY_t` or `uintXxYxZ_t` for SIMD, with X and Y chosen to + match the bit width of the narrowest type in the scalar + function signature and the VLEN associated to the function, + respectively. The form with Z can be used if a structured vector is + needed to carry the user-specified VLEN. + +``` + := "_" + := original C name + | C++ mangle of the original source name + := "_ZGV" + := "n" (Advanced SIMD) + | "s" (SVE) + := "N" (No Mask) + | "M" (Mask) + := VLEN computed or `simdlen` assigned + := { } + := [ "a" [] ] // "a" is for aligned(param:n) + := "v" + | "l" // linear(param:step) + | "ls" // linear(param:step_var) uniform(step_var) + | "u" // uniform(param) + := number +``` Index: include/clang-c/Index.h =================================================================== --- include/clang-c/Index.h +++ include/clang-c/Index.h @@ -3291,6 +3291,7 @@ CXCallingConv_Swift = 13, CXCallingConv_PreserveMost = 14, CXCallingConv_PreserveAll = 15, + CXCallingConv_AArch64VectorCall = 16, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -541,6 +541,8 @@ const TargetInfo *AuxTarget = nullptr; clang::PrintingPolicy PrintingPolicy; + ArrayRef Argv; + public: IdentifierTable &Idents; SelectorTable &Selectors; @@ -648,6 +650,14 @@ PrintingPolicy = Policy; } + void setCmdLineArgs(ArrayRef _Argv) { + Argv = _Argv; + } + + ArrayRef getCmdLineArgs() const { + return Argv; + } + SourceManager& getSourceManager() { return SourceMgr; } const SourceManager& getSourceManager() const { return SourceMgr; } @@ -1026,7 +1036,7 @@ CanQualType SatUnsignedShortFractTy, SatUnsignedFractTy, SatUnsignedLongFractTy; CanQualType HalfTy; // [OpenCL 6.1.1.1], ARM NEON - CanQualType Float16Ty; // C11 extension ISO/IEC TS 18661-3 + CanQualType Float16Ty, Float16ComplexTy; // C11 extension ISO/IEC TS 18661-3 CanQualType FloatComplexTy, DoubleComplexTy, LongDoubleComplexTy; CanQualType Float128ComplexTy; CanQualType VoidPtrTy, NullPtrTy; @@ -1050,6 +1060,8 @@ // The decl is built when constructing 'BuiltinVaListDecl'. mutable Decl *VaListTagDecl; + mutable SmallVector CanQualTypes; + ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins); ASTContext(const ASTContext &) = delete; @@ -1328,6 +1340,9 @@ /// \pre \p VectorType must be a built-in type. QualType getVectorType(QualType VectorType, unsigned NumElts, VectorType::VectorKind VecKind) const; + + QualType getScalableVectorType(QualType ElementType) const; + /// Return the unique reference to the type for a dependently sized vector of /// the specified element type. QualType getDependentVectorType(QualType VectorType, Expr *SizeExpr, @@ -2817,6 +2832,7 @@ private: void InitBuiltinType(CanQualType &R, BuiltinType::Kind K); + void InitTargetBuiltinType(BuiltinType::Kind K, int TD); // Return the Objective-C type encoding for a given type. void getObjCEncodingForTypeImpl(QualType t, std::string &S, Index: include/clang/AST/BuiltinTypesSVE.def =================================================================== --- /dev/null +++ include/clang/AST/BuiltinTypesSVE.def @@ -0,0 +1,57 @@ +//===-- BuiltinTypesSVE.def - Metadata about BuiltinTypes -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the database about various builtin singleton types. +// +// BuiltinType::Id is the enumerator defining the type. +// +// Context.SingletonId is the global singleton of this type. Some global +// singletons are shared by multiple types. +// +// BUILTIN_TYPE(Id, SingletonId) - A builtin type that has not been +// covered by any other #define. Defining this macro covers all +// the builtins. +// +// SVE_VECTOR_TYPE(Id, SingletonId) - A scalable vector. +// +// SVE_PREDICATE_TYPE(Id, SingletonId) - A scalable predicate. +// +//===----------------------------------------------------------------------===// + +#ifndef SVE_VECTOR_TYPE +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, IsSigned, IsFP)\ + BUILTIN_TYPE(Name, Id, SingletonId) +#endif + +#ifndef SVE_PREDICATE_TYPE +#define SVE_PREDICATE_TYPE(Name, Id, SingletonId, ElKind)\ + BUILTIN_TYPE(Name, Id, SingletonId) +#endif + +//===- Vector point types -----------------------------------------------===// + +SVE_VECTOR_TYPE("__SVInt8_t", SveInt8, SveInt8Ty, SveElSInt8, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16_t", SveInt16, SveInt16Ty, SveElSInt16, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32_t", SveInt32, SveInt32Ty, SveElSInt32, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64_t", SveInt64, SveInt64Ty, SveElSInt64, 64, true, false) + +SVE_VECTOR_TYPE("__SVUint8_t", SveUint8, SveUint8Ty, SveElUInt8, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16_t", SveUint16, SveUint16Ty, SveElUInt16, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32_t", SveUint32, SveUint32Ty, SveElUInt32, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64_t", SveUint64, SveUint64Ty, SveElUInt64, 64, false, false) + +SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, SveElHalf, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, SveElFloat, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, SveElDouble, 64, true, true) + +SVE_PREDICATE_TYPE("__SVBool_t", SveBool, SveBoolTy, SveElBool) + +#undef SVE_VECTOR_TYPE +#undef SVE_PREDICATE_TYPE +#undef BUILTIN_TYPE Index: include/clang/AST/CanonicalType.h =================================================================== --- include/clang/AST/CanonicalType.h +++ include/clang/AST/CanonicalType.h @@ -265,6 +265,9 @@ // Type predicates LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isObjectType) LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isIncompleteType) + LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isIndefiniteType) + LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isSizelessType) + LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isSizelessBuiltinType) LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isIncompleteOrObjectType) LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isVariablyModifiedType) LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, isIntegerType) Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -3241,6 +3241,7 @@ void setTagKind(TagKind TK) { TagDeclKind = TK; } bool isStruct() const { return getTagKind() == TTK_Struct; } + bool isSizelessStruct() const { return getTagKind() == TTK_SizelessStruct; } bool isInterface() const { return getTagKind() == TTK_Interface; } bool isClass() const { return getTagKind() == TTK_Class; } bool isUnion() const { return getTagKind() == TTK_Union; } Index: include/clang/AST/Expr.h =================================================================== --- include/clang/AST/Expr.h +++ include/clang/AST/Expr.h @@ -273,7 +273,7 @@ MLV_DuplicateVectorComponents, MLV_InvalidExpression, MLV_LValueCast, // Specialized form of MLV_InvalidExpression. - MLV_IncompleteType, + MLV_IndefiniteType, MLV_ConstQualified, MLV_ConstQualifiedField, MLV_ConstAddrSpace, @@ -328,7 +328,7 @@ CM_ConstQualifiedField, CM_ConstAddrSpace, CM_ArrayType, - CM_IncompleteType + CM_IndefiniteType }; private: Index: include/clang/AST/TargetTypes.h =================================================================== --- /dev/null +++ include/clang/AST/TargetTypes.h @@ -0,0 +1,35 @@ +//===--- TargetBuiltinTypes.h - Target specific builtin IDs -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Enumerates target-specific builtin types in their own namespaces +/// within namespace ::clang. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_TARGETTYPES_H +#define LLVM_CLANG_AST_TARGETTYPES_H + +#include +#include "clang/AST/Type.h" + +namespace clang { + + namespace SVE { + enum { + LastTIBuiltinType = clang::BuiltinType::LastTIBuiltinType, +#define BUILTIN_TYPE(Name, Id, SingletonId) Id, +#include "clang/AST/BuiltinTypesSVE.def" + LastTSBuiltinType + }; + } + +} // end namespace clang. + +#endif Index: include/clang/AST/Type.h =================================================================== --- include/clang/AST/Type.h +++ include/clang/AST/Type.h @@ -38,6 +38,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" @@ -1513,7 +1514,14 @@ /// The kind (BuiltinType::Kind) of builtin type this is. unsigned Kind : 8; + + /// What target is this for? + unsigned Target : 6; + + /// Target specific detail, such as features, etc. + unsigned TargetDetail : 2; }; + static_assert(llvm::Triple::LastArchType <= 63, "Does not fit in 6 bits"); class FunctionTypeBitfields { friend class FunctionProtoType; @@ -1723,18 +1731,43 @@ /// or QualType::getSingleStepDesugaredType(const ASTContext&). QualType getLocallyUnqualifiedSingleStepDesugaredType() const; - /// Types are partitioned into 3 broad categories (C99 6.2.5p1): - /// object types, function types, and incomplete types. + /// As an extension, we classify types as one of "indefinite" or "definite"; + /// every type is one or the other. Indefinite types are types that can + /// describe objects but don't have enough information to construct them. + /// A type is indefinite iff: + /// - it is "incomplete" according to the standard definition; or + /// - it is a sizeless struct whose body has not yet been defined. + /// + /// \brief Def If non-null, and the type refers to some kind of declaration + /// that can be made definite (such as a C struct, C++ class, or Objective-C + /// class), will be set to the declaration. + bool isIndefiniteType(NamedDecl **Def = nullptr) const; + + /// As an extension, we classify types as one of "sized" or "sizeless"; + /// every type is one or the other. Standard types are all sized; + /// sizeless types are purely an extension. + /// + /// Sizeless types contain data with no specified size, alignment, + /// or layout. They are always incomplete. + bool isSizelessType() const; + bool isSizelessBuiltinType() const; /// Return true if this is an incomplete type. /// A type that can describe objects, but which lacks information needed to /// determine its size (e.g. void, or a fwd declared struct). Clients of this /// routine will need to determine if the size is actually required. /// - /// Def If non-null, and the type refers to some kind of declaration - /// that can be completed (such as a C struct, C++ class, or Objective-C - /// class), will be set to the declaration. - bool isIncompleteType(NamedDecl **Def = nullptr) const; + /// A type is incomplete according to our definition iff: + /// - it is "incomplete" according to the standard definition; or + /// - it is sizeless (regardless of whether it's indefinite or definite) + /// + /// The intention is that the usual rules for incomplete types will + /// by default apply to sizeless types as well. Specifically-chosen + /// rules can then be redefined in terms of indefinite and definite if + /// definite sizeless types are acceptable. + bool isIncompleteType() const { + return isIndefiniteType() || isSizelessType(); + } /// Return true if this is an incomplete or object /// type, in other words, not a function type. @@ -2255,23 +2288,30 @@ #include "clang/Basic/OpenCLImageTypes.def" // All other builtin types #define BUILTIN_TYPE(Id, SingletonId) Id, -#define LAST_BUILTIN_TYPE(Id) LastKind = Id +#define LAST_BUILTIN_TYPE(Id) LastTIBuiltinType = Id, #include "clang/AST/BuiltinTypes.def" }; private: friend class ASTContext; // ASTContext creates these. - BuiltinType(Kind K) - : Type(Builtin, QualType(), /*Dependent=*/(K == Dependent), - /*InstantiationDependent=*/(K == Dependent), - /*VariablyModified=*/false, - /*Unexpanded parameter pack=*/false) { + BuiltinType(Kind K, llvm::Triple::ArchType T, int D) + : Type(Builtin, QualType(), /*Dependent=*/(K == Dependent), + /*InstantiationDependent=*/(K == Dependent), + /*VariablyModified=*/false, + /*Unexpanded parameter pack=*/false) { BuiltinTypeBits.Kind = K; + BuiltinTypeBits.Target = T; + BuiltinTypeBits.TargetDetail = D; } public: Kind getKind() const { return static_cast(BuiltinTypeBits.Kind); } + bool isTargetKind() const { return BuiltinTypeBits.Kind > LastTIBuiltinType; } + llvm::Triple::ArchType getTarget() const { + return static_cast(BuiltinTypeBits.Target); + } + int getTargetDetail() const { return BuiltinTypeBits.TargetDetail; } StringRef getName(const PrintingPolicy &Policy) const; const char *getNameAsCString(const PrintingPolicy &Policy) const { @@ -2309,6 +2349,11 @@ /// which cannot appear in arbitrary positions in a fully-formed /// expression. bool isPlaceholderType() const { + // Sizeless builtin types can appear in arbitrary positions in a + // fully-formed expression. + if (isSizelessBuiltinType()) + return false; + return isPlaceholderTypeKind(getKind()); } @@ -4222,6 +4267,7 @@ attr_pascal, attr_swiftcall, attr_vectorcall, + attr_aarch64_vector_pcs, attr_inteloclbicc, attr_ms_abi, attr_sysv_abi, @@ -4856,9 +4902,10 @@ /// The "class" keyword. TTK_Class, - - /// The "enum" keyword. - TTK_Enum + /// \brief The "enum" keyword. + TTK_Enum, + /// \brief The "__sizeless_struct" keyword. + TTK_SizelessStruct }; /// The elaboration keyword that precedes a qualified type name or @@ -4866,8 +4913,10 @@ enum ElaboratedTypeKeyword { /// The "struct" keyword introduces the elaborated-type-specifier. ETK_Struct, - - /// The "__interface" keyword introduces the elaborated-type-specifier. + /// \brief The "__sizeless_struct" keyword introduces the + /// elaborated-type-specifier. + ETK_SizelessStruct, + /// \brief The "__interface" keyword introduces the elaborated-type-specifier. ETK_Interface, /// The "union" keyword introduces the elaborated-type-specifier. Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1764,6 +1764,14 @@ let Documentation = [PcsDocs]; } +def AArch64VectorPcs: InheritableAttr { + let Spellings = [GNU<"aarch64_vector_pcs">, + CXX11<"clang", "aarch64_vector_pcs">, + Keyword<"__aarch64_vector_pcs">, + Keyword<"_aarch64_vector_pcs">]; + let Documentation = [AArch64VectorPcsDocs]; +} + def Pure : InheritableAttr { let Spellings = [GCC<"pure">]; let Documentation = [Undocumented]; @@ -2776,6 +2784,7 @@ def LoopHint : Attr { /// #pragma clang loop