diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1776,7 +1776,8 @@ None, Target, CPUSpecific, - CPUDispatch + CPUDispatch, + OMPVariant, }; /// Represents a function declaration or definition. @@ -2378,6 +2379,10 @@ /// the target functionality. bool isTargetMultiVersion() const; + /// True if this function is a multiversioned function as a part of + /// the OpenMP begin/end declare variant functionality. + bool isOpenMPMultiVersion() const; + void setPreviousDeclaration(FunctionDecl * PrevDecl); FunctionDecl *getCanonicalDecl() override; diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -4594,18 +4594,6 @@ } }; -class OMPDeclareVariantAttr; - -/// Helper to determine the best of two potential context matches. Note that -/// nullptr are valid inputs but also valid outputs, e.g., if neither attribute -/// describes a matching context. -const OMPDeclareVariantAttr * -getBetterOpenMPContextMatch(ASTContext &C, const OMPDeclareVariantAttr *LHSAttr, - const OMPDeclareVariantAttr *RHSAttr); - -/// Return true if the context described by \p A matches. -bool isOpenMPContextMatch(ASTContext &C, const OMPDeclareVariantAttr *A); - } // end namespace clang #endif diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -313,7 +313,7 @@ void getOpenMPCaptureRegions( llvm::SmallVectorImpl &CaptureRegions, OpenMPDirectiveKind DKind); -} +} // namespace clang #endif diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2860,6 +2860,10 @@ parseOpenMPContextSelectors(SourceLocation Loc, SmallVectorImpl &Data); + /// Parse match clause of '#pragma omp [begin] declare variant'. + void ParseOMPDeclareVariantMatchClause( + SourceLocation Loc, SmallVectorImpl &Data); + /// Parse clauses for '#pragma omp declare variant'. void ParseOMPDeclareVariantClauses(DeclGroupPtrTy Ptr, CachedTokens &Toks, SourceLocation Loc); diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -831,6 +831,9 @@ /// to be used while performing partial ordering of function templates. unsigned ExplicitCallArguments; + /// TODO + UnresolvedLookupExpr *ULE = nullptr; + union { DeductionFailureInfo DeductionFailure; @@ -1086,7 +1089,7 @@ /// Find the best viable function on this overload set, if it exists. OverloadingResult BestViableFunction(Sema &S, SourceLocation Loc, - OverloadCandidateSet::iterator& Best); + OverloadCandidateSet::iterator &Best); SmallVector CompleteCandidates( Sema &S, OverloadCandidateDisplayKind OCD, ArrayRef Args, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9345,7 +9345,22 @@ //===--------------------------------------------------------------------===// // OpenMP directives and clauses. // + /// Helper to determine the best of two potential context matches. Note that + /// nullptr are valid inputs but also valid outputs, e.g., if neither + /// attribute describes a matching context. + const OMPDeclareVariantAttr * + getBetterOpenMPContextMatch(const OMPDeclareVariantAttr *LHSAttr, + const OMPDeclareVariantAttr *RHSAttr, + FunctionDecl *LHSFD = nullptr, + FunctionDecl *RHSFD = nullptr); + + // TODO + bool isNonMatchingDueToVariantContext(FunctionDecl &FD); + private: + /// Copies declare variant attributes from the template TD to the function FD. + void inheritOpenMPVariantAttrs(FunctionDecl *FD, + const FunctionTemplateDecl &TD); void *VarDataSharingAttributesStack; /// Number of nested '#pragma omp declare target' directives. unsigned DeclareTargetNestingLevel = 0; @@ -9409,6 +9424,9 @@ using OMPCtxSelectorData = OpenMPCtxSelectorData, ExprResult>; + /// A declare variant attribute if we are inside a begin/end declare variant + OMPDeclareVariantAttr *DeclareVariantScopeAttr = nullptr; + /// Checks if the variant/multiversion functions are compatible. bool areMultiversionVariantFunctionsCompatible( const FunctionDecl *OldFD, const FunctionDecl *NewFD, @@ -9416,7 +9434,9 @@ const PartialDiagnosticAt &NoteCausedDiagIDAt, const PartialDiagnosticAt &NoSupportDiagIDAt, const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported, - bool ConstexprSupported, bool CLinkageMayDiffer); + bool ConstexprSupported, bool CLinkageMayDiffer, + bool StorageClassMayDiffer, bool ConstexprSpecMayDiffer, + bool InlineSpecificationMayDiffer); /// Function tries to capture lambda's captured variables in the OpenMP region /// before the original lambda is captured. @@ -9891,7 +9911,7 @@ /// must be used instead of the original one, specified in \p DG. /// \param Data Set of context-specific data for the specified context /// selector. - void ActOnOpenMPDeclareVariantDirective(FunctionDecl *FD, Expr *VariantRef, + bool ActOnOpenMPDeclareVariantDirective(FunctionDecl *FD, Expr *VariantRef, SourceRange SR, ArrayRef Data); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3104,6 +3104,9 @@ return MultiVersionKind::CPUDispatch; if (hasAttr()) return MultiVersionKind::CPUSpecific; + if (hasAttr() && + !getAttr()->getVariantFuncRef()) + return MultiVersionKind::OMPVariant; return MultiVersionKind::None; } @@ -3119,6 +3122,11 @@ return isMultiVersion() && hasAttr(); } +bool FunctionDecl::isOpenMPMultiVersion() const { + return isMultiVersion() && hasAttr() && + !getAttr()->getVariantFuncRef(); +} + void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -13,8 +13,6 @@ #include "clang/AST/StmtOpenMP.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/Attr.h" -#include "llvm/ADT/SetOperations.h" using namespace clang; using namespace llvm::omp; @@ -2242,264 +2240,3 @@ return new (Mem) OMPTargetTeamsDistributeSimdDirective(CollapsedNum, NumClauses); } - -// TODO: We have various representations for the same data, it might help to -// reuse some instead of converting them. -// TODO: It is unclear where this checking code should live. It is used all over -// the place and would probably fit bet in OMPDeclareVariantAttr. -using OMPContextSelectorData = - OpenMPCtxSelectorData, llvm::APSInt>; -using CompleteOMPContextSelectorData = SmallVector; - -/// Checks current context and returns true if it matches the context selector. -template -static bool checkContext(const OMPContextSelectorData &Data, - Arguments... Params) { - assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && - "Unknown context selector or context selector set."); - return false; -} - -/// Checks for implementation={vendor()} context selector. -/// \returns true iff ="llvm", false otherwise. -template <> -bool checkContext( - const OMPContextSelectorData &Data) { - return llvm::all_of(Data.Names, - [](StringRef S) { return !S.compare_lower("llvm"); }); -} - -/// Checks for device={kind()} context selector. -/// \returns true if ="host" and compilation is for host. -/// true if ="nohost" and compilation is for device. -/// true if ="cpu" and compilation is for Arm, X86 or PPC CPU. -/// true if ="gpu" and compilation is for NVPTX or AMDGCN. -/// false otherwise. -template <> -bool checkContext(const OMPContextSelectorData &Data, - const LangOptions &LO, - const TargetInfo &TI) { - for (StringRef Name : Data.Names) { - if (!Name.compare_lower("host")) { - if (LO.OpenMPIsDevice) - return false; - continue; - } - if (!Name.compare_lower("nohost")) { - if (!LO.OpenMPIsDevice) - return false; - continue; - } - switch (TI.getTriple().getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - case llvm::Triple::aarch64_32: - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - case llvm::Triple::x86: - case llvm::Triple::x86_64: - if (Name.compare_lower("cpu")) - return false; - break; - case llvm::Triple::amdgcn: - case llvm::Triple::nvptx: - case llvm::Triple::nvptx64: - if (Name.compare_lower("gpu")) - return false; - break; - case llvm::Triple::UnknownArch: - case llvm::Triple::arc: - case llvm::Triple::avr: - case llvm::Triple::bpfel: - case llvm::Triple::bpfeb: - case llvm::Triple::hexagon: - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - case llvm::Triple::msp430: - case llvm::Triple::r600: - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: - case llvm::Triple::sparc: - case llvm::Triple::sparcv9: - case llvm::Triple::sparcel: - case llvm::Triple::systemz: - case llvm::Triple::tce: - case llvm::Triple::tcele: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - case llvm::Triple::xcore: - case llvm::Triple::le32: - case llvm::Triple::le64: - case llvm::Triple::amdil: - case llvm::Triple::amdil64: - case llvm::Triple::hsail: - case llvm::Triple::hsail64: - case llvm::Triple::spir: - case llvm::Triple::spir64: - case llvm::Triple::kalimba: - case llvm::Triple::shave: - case llvm::Triple::lanai: - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - case llvm::Triple::renderscript32: - case llvm::Triple::renderscript64: - return false; - } - } - return true; -} - -static CompleteOMPContextSelectorData -translateAttrToContextSelectorData(ASTContext &C, - const OMPDeclareVariantAttr *A) { - CompleteOMPContextSelectorData Data; - if (!A) - return Data; - for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { - Data.emplace_back(); - auto CtxSet = static_cast( - *std::next(A->ctxSelectorSets_begin(), I)); - auto Ctx = static_cast( - *std::next(A->ctxSelectors_begin(), I)); - Data.back().CtxSet = CtxSet; - Data.back().Ctx = Ctx; - const Expr *Score = *std::next(A->scores_begin(), I); - Score->dump(); - Data.back().Score = Score->EvaluateKnownConstInt(C); - switch (Ctx) { - case OMP_CTX_vendor: - assert(CtxSet == OMP_CTX_SET_implementation && - "Expected implementation context selector set."); - Data.back().Names = - llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); - break; - case OMP_CTX_kind: - assert(CtxSet == OMP_CTX_SET_device && - "Expected device context selector set."); - Data.back().Names = - llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); - break; - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector kind."); - } - } - return Data; -} - -static bool -matchesOpenMPContextImpl(const CompleteOMPContextSelectorData &ContextData, - const LangOptions &LO, const TargetInfo &TI) { - for (const OMPContextSelectorData &Data : ContextData) { - switch (Data.Ctx) { - case OMP_CTX_vendor: - assert(Data.CtxSet == OMP_CTX_SET_implementation && - "Expected implementation context selector set."); - if (!checkContext(Data)) - return false; - break; - case OMP_CTX_kind: - assert(Data.CtxSet == OMP_CTX_SET_device && - "Expected device context selector set."); - if (!checkContext(Data, LO, TI)) - return false; - break; - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector kind."); - } - } - return true; -} - -static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, - const CompleteOMPContextSelectorData &RHS) { - llvm::SmallDenseMap, llvm::StringSet<>, 4> RHSData; - for (const OMPContextSelectorData &D : RHS) { - auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); - Pair.getSecond().insert(D.Names.begin(), D.Names.end()); - } - bool AllSetsAreEqual = true; - for (const OMPContextSelectorData &D : LHS) { - auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); - if (It == RHSData.end()) - return false; - if (D.Names.size() > It->getSecond().size()) - return false; - if (llvm::set_union(It->getSecond(), D.Names)) - return false; - AllSetsAreEqual = - AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); - } - - return LHS.size() != RHS.size() || !AllSetsAreEqual; -} - -const OMPDeclareVariantAttr * -clang::getBetterOpenMPContextMatch(ASTContext &C, - const OMPDeclareVariantAttr *LHSAttr, - const OMPDeclareVariantAttr *RHSAttr) { - const CompleteOMPContextSelectorData LHS = - translateAttrToContextSelectorData(C, LHSAttr); - const CompleteOMPContextSelectorData RHS = - translateAttrToContextSelectorData(C, RHSAttr); - bool LHSMatch = LHSAttr && matchesOpenMPContextImpl(LHS, C.getLangOpts(), - C.getTargetInfo()); - bool RHSMatch = RHSAttr && matchesOpenMPContextImpl(RHS, C.getLangOpts(), - C.getTargetInfo()); - bool LHSisOK = LHSMatch && !LHSAttr->isInherited(); - bool RHSisOK = RHSMatch && !RHSAttr->isInherited(); - if (!LHSisOK && !RHSisOK) - return nullptr; - if (LHSisOK && !RHSisOK) - return LHSAttr; - if (!LHSisOK && RHSisOK) - return RHSAttr; - assert(LHSisOK && RHSisOK && "broken invariant"); - - // Score is calculated as sum of all scores + 1. - llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); - bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); - if (RHSIsSubsetOfLHS) { - LHSScore = llvm::APSInt::get(0); - } else { - for (const OMPContextSelectorData &Data : LHS) { - if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { - LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; - } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { - LHSScore += Data.Score.extend(LHSScore.getBitWidth()); - } else { - LHSScore += Data.Score; - } - } - } - llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); - if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { - RHSScore = llvm::APSInt::get(0); - } else { - for (const OMPContextSelectorData &Data : RHS) { - if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { - RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; - } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { - RHSScore += Data.Score.extend(RHSScore.getBitWidth()); - } else { - RHSScore += Data.Score; - } - } - } - return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0 ? LHSAttr - : RHSAttr; -} - -bool clang::isOpenMPContextMatch(ASTContext &C, - const OMPDeclareVariantAttr *A) { - const CompleteOMPContextSelectorData Data = - translateAttrToContextSelectorData(C, A); - return matchesOpenMPContextImpl(Data, C.getLangOpts(), C.getTargetInfo()); -} diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -11,7 +11,11 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/OpenMPKinds.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/Basic/IdentifierTable.h" +#include "clang/Sema/Template.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -982,6 +982,18 @@ } } +static void AppendOpenMPVariantMangling(const CodeGenModule &CGM, + const FunctionDecl *FD, + raw_ostream &Out) { + for (const OMPDeclareVariantAttr *Attr : + FD->specific_attrs()) { + if (Attr->isInherited()) + continue; + // TODO: Mangle the name based on the context + Out << ".ompvariant"; + } +} + static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -1012,6 +1024,7 @@ if (const auto *FD = dyn_cast(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { + FD->dump(); switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: @@ -1022,6 +1035,9 @@ case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr(), Out); break; + case MultiVersionKind::OMPVariant: + AppendOpenMPVariantMangling(CGM, FD, Out); + break; case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } @@ -2851,6 +2867,10 @@ for (GlobalDecl GD : MultiVersionFuncs) { SmallVector Options; const FunctionDecl *FD = cast(GD.getDecl()); + // OpenMP multi versioning is (for now) resolved at compile time, no + // resolver function necessary (yet). + if (FD->isOpenMPMultiVersion()) + continue; getContext().forEachMultiversionedFunctionVersion( FD, [this, &GD, &Options](const FunctionDecl *CurFD) { GlobalDecl CurGD{ @@ -3098,7 +3118,7 @@ } } - if (FD->isMultiVersion()) { + if (FD->isMultiVersion() && !FD->isOpenMPMultiVersion()) { const auto *TA = FD->getAttr(); if (TA && TA->isDefaultVersion()) UpdateMultiVersionNames(GD, FD); diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -36,26 +36,10 @@ #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } -#endif -// TODO: remove once variat is supported. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } -__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } -#endif __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } @@ -64,11 +48,9 @@ __DEVICE__ float cos(float __x) { return ::cosf(__x); } __DEVICE__ float cosh(float __x) { return ::coshf(__x); } __DEVICE__ float exp(float __x) { return ::expf(__x); } -__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } +__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } -// TODO: remove when variant is supported -#ifndef _OPENMP __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); @@ -77,7 +59,6 @@ return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -#endif __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -457,10 +438,7 @@ using ::remquof; using ::rintf; using ::roundf; -// TODO: remove once variant is supported -#ifndef _OPENMP using ::scalblnf; -#endif using ::scalbnf; using ::sinf; using ::sinhf; diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -37,15 +37,6 @@ #define __FAST_OR_SLOW(fast, slow) slow #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); } __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); } __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); } @@ -57,7 +48,9 @@ __DEVICE__ void __brkpt() { asm volatile("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } #else -__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); } +__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { + asm volatile("brkpt;"); +} __DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } #endif __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, @@ -1483,8 +1476,8 @@ return r; } #endif // CUDA_VERSION >= 9020 -__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); } -__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); } +__DEVICE__ int abs(int __a) { return __nv_abs(__a); } +__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } @@ -1581,15 +1574,15 @@ __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } #if defined(__LP64__) || defined(_WIN64) -__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); }; +__DEVICE__ long labs(long __a) { return __nv_llabs(__a); }; #else -__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); }; +__DEVICE__ long labs(long __a) { return __nv_abs(__a); }; #endif __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } -__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); } +__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); } __DEVICE__ long long llmax(long long __a, long long __b) { return __nv_llmax(__a, __b); } @@ -1719,8 +1712,6 @@ __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } -// TODO: remove once variant is supported -#ifndef _OPENMP __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; @@ -1735,7 +1726,6 @@ return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } -#endif __DEVICE__ double sin(double __a) { return __nv_sin(__a); } __DEVICE__ void sincos(double __a, double *__s, double *__c) { return __nv_sincos(__a, __s, __c); @@ -1787,7 +1777,7 @@ __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } -#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #pragma pop_macro("__FAST_OR_SLOW") + #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h --- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -27,30 +27,8 @@ static __inline__ __attribute__((always_inline)) __attribute__((device)) #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); -__DEVICE__ double abs(double); -__DEVICE__ float abs(float); -#endif -// While providing the CUDA declarations and definitions for math functions, -// we may manually define additional functions. -// TODO: Once variant is supported the additional functions will have -// to be removed. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const double abs(const double); -__DEVICE__ const float abs(const float); -#endif -__DEVICE__ int abs(int) __NOEXCEPT; __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); @@ -85,8 +63,8 @@ __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); -__DEVICE__ double fabs(double) __NOEXCEPT; -__DEVICE__ float fabs(float) __NOEXCEPT; +__DEVICE__ double fabs(double); +__DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); @@ -136,12 +114,12 @@ __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); -__DEVICE__ long labs(long) __NOEXCEPT; +__DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); -__DEVICE__ long long llabs(long long) __NOEXCEPT; +__DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); @@ -152,9 +130,7 @@ __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); -#if defined(_OPENMP) && defined(__cplusplus) __DEVICE__ long double log(long double); -#endif __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); @@ -264,6 +240,7 @@ using ::lgamma; using ::llabs; using ::llrint; +using ::llround; using ::log; using ::log10; using ::log1p; @@ -271,7 +248,6 @@ using ::logb; using ::lrint; using ::lround; -using ::llround; using ::modf; using ::nan; using ::nanf; @@ -302,7 +278,6 @@ } // namespace std #endif -#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #endif diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h @@ -8,17 +8,6 @@ */ #if defined(__NVPTX__) && defined(_OPENMP) -/// TODO: -/// We are currently reusing the functionality of the Clang-CUDA code path -/// as an alternative to the host declarations provided by math.h and cmath. -/// This is suboptimal. -/// -/// We should instead declare the device functions in a similar way, e.g., -/// through OpenMP 5.0 variants, and afterwards populate the module with the -/// host declarations by unconditionally including the host math.h or cmath, -/// respectively. This is actually what the Clang-CUDA code path does, using -/// __device__ instead of variants to avoid redeclarations and get the desired -/// overload resolution. #define __CUDA__ @@ -28,8 +17,5 @@ #undef __CUDA__ -/// Magic macro for stopping the math.h/cmath host header from being included. -#define __CLANG_NO_HOST_MATH__ - #endif diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h @@ -19,7 +19,12 @@ #define __CUDA__ #if defined(__cplusplus) - #include <__clang_cuda_math_forward_declares.h> +#include <__clang_cuda_math_forward_declares.h> +#include +#include +#else +#include +#include #endif /// Include declarations for libdevice functions. diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -7,10 +7,10 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> - -#ifndef __CLANG_NO_HOST_MATH__ +#pragma omp begin declare variant match(device = {kind(host)}) #include_next -#else -#undef __CLANG_NO_HOST_MATH__ -#endif +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(gpu)}) +#include <__clang_openmp_math.h> +#pragma omp end declare variant diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h --- a/clang/lib/Headers/openmp_wrappers/math.h +++ b/clang/lib/Headers/openmp_wrappers/math.h @@ -7,11 +7,10 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> - -#ifndef __CLANG_NO_HOST_MATH__ +#pragma omp begin declare variant match(device = {kind(host)}) #include_next -#else -#undef __CLANG_NO_HOST_MATH__ -#endif +#pragma omp end declare variant +#pragma omp begin declare variant match(device = {kind(gpu)}) +#include <__clang_openmp_math.h> +#pragma omp end declare variant diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -45,6 +45,8 @@ OMPD_target_teams_distribute_parallel, OMPD_mapper, OMPD_variant, + OMPD_begin, + OMPD_begin_declare, }; // Helper to unify the enum class OpenMPDirectiveKind with its extension @@ -98,6 +100,7 @@ .Case("update", OMPD_update) .Case("mapper", OMPD_mapper) .Case("variant", OMPD_variant) + .Case("begin", OMPD_begin) .Default(OMPD_unknown); } @@ -106,18 +109,21 @@ // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd // TODO: add other combined directives in topological order. static const OpenMPDirectiveKindExWrapper F[][3] = { + {OMPD_begin, OMPD_declare, OMPD_begin_declare}, + {OMPD_end, OMPD_declare, OMPD_end_declare}, {OMPD_cancellation, OMPD_point, OMPD_cancellation_point}, {OMPD_declare, OMPD_reduction, OMPD_declare_reduction}, {OMPD_declare, OMPD_mapper, OMPD_declare_mapper}, {OMPD_declare, OMPD_simd, OMPD_declare_simd}, {OMPD_declare, OMPD_target, OMPD_declare_target}, {OMPD_declare, OMPD_variant, OMPD_declare_variant}, + {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant}, + {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant}, {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel}, {OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for}, {OMPD_distribute_parallel_for, OMPD_simd, OMPD_distribute_parallel_for_simd}, {OMPD_distribute, OMPD_simd, OMPD_distribute_simd}, - {OMPD_end, OMPD_declare, OMPD_end_declare}, {OMPD_end_declare, OMPD_target, OMPD_end_declare_target}, {OMPD_target, OMPD_data, OMPD_target_data}, {OMPD_target, OMPD_enter, OMPD_target_enter}, @@ -1062,37 +1068,8 @@ return false; } -/// Parse clauses for '#pragma omp declare variant ( variant-func-id ) clause'. -void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, - CachedTokens &Toks, - SourceLocation Loc) { - PP.EnterToken(Tok, /*IsReinject*/ true); - PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, - /*IsReinject*/ true); - // Consume the previously pushed token. - ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); - ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); - - FNContextRAII FnContext(*this, Ptr); - // Parse function declaration id. - SourceLocation RLoc; - // Parse with IsAddressOfOperand set to true to parse methods as DeclRefExprs - // instead of MemberExprs. - ExprResult AssociatedFunction = - ParseOpenMPParensExpr(getOpenMPDirectiveName(OMPD_declare_variant), RLoc, - /*IsAddressOfOperand=*/true); - if (!AssociatedFunction.isUsable()) { - if (!Tok.is(tok::annot_pragma_openmp_end)) - while (!SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch)) - ; - // Skip the last annot_pragma_openmp_end. - (void)ConsumeAnnotationToken(); - return; - } - Optional> DeclVarData = - Actions.checkOpenMPDeclareVariantFunction( - Ptr, AssociatedFunction.get(), SourceRange(Loc, Tok.getLocation())); - +void Parser::ParseOMPDeclareVariantMatchClause( + SourceLocation Loc, SmallVectorImpl &Data) { // Parse 'match'. OpenMPClauseKind CKind = Tok.isAnnotation() ? OMPC_unknown @@ -1119,7 +1096,6 @@ } // Parse inner context selectors. - SmallVector Data; if (!parseOpenMPContextSelectors(Loc, Data)) { // Parse ')'. (void)T.consumeClose(); @@ -1129,6 +1105,41 @@ << getOpenMPDirectiveName(OMPD_declare_variant); } } +} + +/// Parse clauses for '#pragma omp declare variant ( variant-func-id ) clause'. +void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr, + CachedTokens &Toks, + SourceLocation Loc) { + PP.EnterToken(Tok, /*IsReinject*/ true); + PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, + /*IsReinject*/ true); + // Consume the previously pushed token. + ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); + ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); + + FNContextRAII FnContext(*this, Ptr); + // Parse function declaration id. + SourceLocation RLoc; + // Parse with IsAddressOfOperand set to true to parse methods as DeclRefExprs + // instead of MemberExprs. + ExprResult AssociatedFunction = + ParseOpenMPParensExpr(getOpenMPDirectiveName(OMPD_declare_variant), RLoc, + /*IsAddressOfOperand=*/true); + if (!AssociatedFunction.isUsable()) { + if (!Tok.is(tok::annot_pragma_openmp_end)) + while (!SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch)) + ; + // Skip the last annot_pragma_openmp_end. + (void)ConsumeAnnotationToken(); + return; + } + Optional> DeclVarData = + Actions.checkOpenMPDeclareVariantFunction( + Ptr, AssociatedFunction.get(), SourceRange(Loc, Tok.getLocation())); + + SmallVector Data; + ParseOMPDeclareVariantMatchClause(Loc, Data); // Skip last tokens. while (Tok.isNot(tok::annot_pragma_openmp_end)) @@ -1462,6 +1473,46 @@ } break; } + case OMPD_begin_declare_variant: { + // The syntax is: + // { #pragma omp begin declare variant clause } + // + // { #pragma omp end declare variant } + // + ConsumeToken(); + + SmallVector Data; + ParseOMPDeclareVariantMatchClause(Loc, Data); + + // Skip last tokens. + while (Tok.isNot(tok::annot_pragma_openmp_end)) + ConsumeAnyToken(); + + bool Elide = Actions.ActOnOpenMPDeclareVariantDirective( + nullptr, nullptr, SourceRange(Loc, Tok.getLocation()), Data); + if (!Elide) + break; + + // Elide all the code till the matching end declare variant was found. + unsigned Nesting = 1; + do { + ConsumeAnyToken(); + OpenMPDirectiveKind DK = parseOpenMPDirectiveKind(*this); + if (DK == OMPD_end_declare_variant) + --Nesting; + if (DK == OMPD_begin_declare_variant) + ++Nesting; + } while (Nesting); + + LLVM_FALLTHROUGH; + } + case OMPD_end_declare_variant: + assert(getActions().DeclareVariantScopeAttr && + "TODO error for unmatched end declare variant"); + // TODO: verify DeclareVariantScopeAttr is null after parsing + // TODO: Make this a call in the SEMA + getActions().DeclareVariantScopeAttr = nullptr; + break; case OMPD_declare_variant: case OMPD_declare_simd: { // The syntax is: @@ -1948,6 +1999,8 @@ case OMPD_end_declare_target: case OMPD_requires: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: Diag(Tok, diag::err_omp_unexpected_directive) << 1 << getOpenMPDirectiveName(DKind); SkipUntil(tok::annot_pragma_openmp_end); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2349,8 +2349,7 @@ if (!isa(Old)) return; - Diag(New->getLocation(), diag::err_redefinition) - << New->getDeclName(); + Diag(New->getLocation(), diag::err_redefinition) << New->getDeclName(); notePreviousDefinition(Old, New->getLocation()); return New->setInvalidDecl(); } @@ -8654,6 +8653,14 @@ isVirtualOkay); if (!NewFD) return nullptr; + if (getLangOpts().OpenMP && DeclareVariantScopeAttr) { + OMPDeclareVariantAttr *DeclVarAttr = + DeclareVariantScopeAttr->clone(getASTContext()); + DeclVarAttr->setInherited(true); + NewFD->addAttr(DeclVarAttr); + NewFD->setIsMultiVersion(); + } + if (OriginalLexicalContext && OriginalLexicalContext->isObjCContainer()) NewFD->setTopLevelDeclInObjCContainer(); @@ -9784,6 +9791,10 @@ if (MVType != MultiVersionKind::Target) return true; break; + case attr::OMPDeclareVariant: + if (MVType != MultiVersionKind::OMPVariant) + return true; + break; default: return true; } @@ -9797,7 +9808,8 @@ const PartialDiagnosticAt &NoteCausedDiagIDAt, const PartialDiagnosticAt &NoSupportDiagIDAt, const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported, - bool ConstexprSupported, bool CLinkageMayDiffer) { + bool ConstexprSupported, bool CLinkageMayDiffer, bool StorageClassMayDiffer, + bool ConstexprSpecMayDiffer, bool InlineSpecificationMayDiffer) { enum DoesntSupport { FuncTemplates = 0, VirtFuncs = 1, @@ -9860,7 +9872,7 @@ QualType NewQType = Context.getCanonicalType(NewFD->getType()); const auto *NewType = cast(NewQType); - QualType NewReturnType = NewType->getReturnType(); + QualType NewReturnType = NewType->getReturnType().getUnqualifiedType(); if (NewReturnType->isUndeducedType()) return Diag(NoSupportDiagIDAt.first, NoSupportDiagIDAt.second) @@ -9876,18 +9888,21 @@ if (OldTypeInfo.getCC() != NewTypeInfo.getCC()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << CallingConv; - QualType OldReturnType = OldType->getReturnType(); + QualType OldReturnType = OldType->getReturnType().getUnqualifiedType(); if (OldReturnType != NewReturnType) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << ReturnType; - if (OldFD->getConstexprKind() != NewFD->getConstexprKind()) + if (!ConstexprSpecMayDiffer && + OldFD->getConstexprKind() != NewFD->getConstexprKind()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << ConstexprSpec; - if (OldFD->isInlineSpecified() != NewFD->isInlineSpecified()) + if (!InlineSpecificationMayDiffer && + OldFD->isInlineSpecified() != NewFD->isInlineSpecified()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << InlineSpec; - if (OldFD->getStorageClass() != NewFD->getStorageClass()) + if (!StorageClassMayDiffer && + OldFD->getStorageClass() != NewFD->getStorageClass()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << StorageClass; if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC()) @@ -9905,7 +9920,9 @@ const FunctionDecl *NewFD, bool CausesMV, MultiVersionKind MVType) { - if (!S.getASTContext().getTargetInfo().supportsMultiVersioning()) { + bool IsOpenMPVariant = MVType == MultiVersionKind::OMPVariant; + if (!IsOpenMPVariant && + !S.getASTContext().getTargetInfo().supportsMultiVersioning()) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_supported); if (OldFD) S.Diag(OldFD->getLocation(), diag::note_previous_declaration); @@ -9918,19 +9935,20 @@ // For now, disallow all other attributes. These should be opt-in, but // an analysis of all of them is a future FIXME. - if (CausesMV && OldFD && HasNonMultiVersionAttributes(OldFD, MVType)) { + if (CausesMV && OldFD && !IsOpenMPVariant && + HasNonMultiVersionAttributes(OldFD, MVType)) { S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs) << IsCPUSpecificCPUDispatchMVType; S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); return true; } - if (HasNonMultiVersionAttributes(NewFD, MVType)) + if (!IsOpenMPVariant && HasNonMultiVersionAttributes(NewFD, MVType)) return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs) << IsCPUSpecificCPUDispatchMVType; // Only allow transition to MultiVersion if it hasn't been used. - if (OldFD && CausesMV && OldFD->isUsed(false)) + if (OldFD && CausesMV && !IsOpenMPVariant && OldFD->isUsed(false)) return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used); return S.areMultiversionVariantFunctionsCompatible( @@ -9942,9 +9960,12 @@ << IsCPUSpecificCPUDispatchMVType), PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_diff)), - /*TemplatesSupported=*/false, + /*TemplatesSupported=*/IsOpenMPVariant, /*ConstexprSupported=*/!IsCPUSpecificCPUDispatchMVType, - /*CLinkageMayDiffer=*/false); + /*CLinkageMayDiffer=*/IsOpenMPVariant, + /*StorageClassMayDiffer=*/IsOpenMPVariant, + /*ConstexprSpecMayDiffer=*/IsOpenMPVariant, + /*InlineSpecificationMayDiffer=*/IsOpenMPVariant); } /// Check the validity of a multiversion function declaration that is the @@ -9955,7 +9976,8 @@ /// Returns true if there was an error, false otherwise. static bool CheckMultiVersionFirstFunction(Sema &S, FunctionDecl *FD, MultiVersionKind MVType, - const TargetAttr *TA) { + const TargetAttr *TA, + NamedDecl *OldDecl) { assert(MVType != MultiVersionKind::None && "Function lacks multiversion attribute"); @@ -10074,8 +10096,8 @@ Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, MultiVersionKind NewMVType, const TargetAttr *NewTA, const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, - LookupResult &Previous) { + const OMPDeclareVariantAttr *NewOpenMPVariant, bool &Redeclaration, + NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, LookupResult &Previous) { MultiVersionKind OldMVType = OldFD->getMultiVersionKind(); // Disallow mixing of multiversioning types. @@ -10089,6 +10111,18 @@ return true; } + if (OldMVType == MultiVersionKind::OMPVariant && + NewMVType == MultiVersionKind::None) { + assert(!NewOpenMPVariant && "Didn't expect variant attr!"); + auto *OldOMPVariant = OldFD->getAttr(); + auto *NewOMPVariant = OldOMPVariant->clone(S.getASTContext()); + NewOMPVariant->setInherited(true); + NewFD->addAttr(NewOMPVariant); + NewFD->setIsMultiVersion(); + NewOpenMPVariant = NewOMPVariant; + NewMVType = MultiVersionKind::OMPVariant; + } + ParsedTargetAttr NewParsed; if (NewTA) { NewParsed = NewTA->parse(); @@ -10123,6 +10157,14 @@ NewFD->setInvalidDecl(); return true; } + } else if (NewMVType == MultiVersionKind::OMPVariant) { + auto *CurOMPVariant = CurFD->getAttr(); + if (!CurOMPVariant) { + CurOMPVariant = NewOpenMPVariant->clone(S.getASTContext()); + CurOMPVariant->setInherited(true); + CurFD->addAttr(CurOMPVariant); + CurFD->setIsMultiVersion(); + } } else { const auto *CurCPUSpec = CurFD->getAttr(); const auto *CurCPUDisp = CurFD->getAttr(); @@ -10215,7 +10257,6 @@ return false; } - /// Check the validity of a mulitversion function declaration. /// Also sets the multiversion'ness' of the function itself. /// @@ -10229,10 +10270,12 @@ const auto *NewTA = NewFD->getAttr(); const auto *NewCPUDisp = NewFD->getAttr(); const auto *NewCPUSpec = NewFD->getAttr(); + const auto *NewOpenMPVariant = NewFD->getAttr(); + unsigned NumMV = bool(NewTA) + bool(NewCPUDisp) + bool(NewCPUSpec) + + bool(NewOpenMPVariant); // Mixing Multiversioning types is prohibited. - if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) || - (NewCPUDisp && NewCPUSpec)) { + if (NumMV > 1) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); NewFD->setInvalidDecl(); return true; @@ -10253,14 +10296,18 @@ return false; } + if (auto *USD = dyn_cast_or_null(OldDecl)) + OldDecl = USD->getTargetDecl(); + if (!OldDecl || !OldDecl->getAsFunction() || - OldDecl->getDeclContext()->getRedeclContext() != - NewFD->getDeclContext()->getRedeclContext()) { + (OldDecl->getDeclContext()->getRedeclContext() != + NewFD->getDeclContext()->getRedeclContext() && + !OldDecl->getAsFunction()->isOpenMPMultiVersion())) { // If there's no previous declaration, AND this isn't attempting to cause // multiversioning, this isn't an error condition. if (MVType == MultiVersionKind::None) return false; - return CheckMultiVersionFirstFunction(S, NewFD, MVType, NewTA); + return CheckMultiVersionFirstFunction(S, NewFD, MVType, NewTA, OldDecl); } FunctionDecl *OldFD = OldDecl->getAsFunction(); @@ -10268,7 +10315,8 @@ if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::None) return false; - if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None) { + if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None && + !OldFD->isOpenMPMultiVersion()) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (OldFD->getMultiVersionKind() != MultiVersionKind::Target); NewFD->setInvalidDecl(); @@ -10285,8 +10333,8 @@ // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. return CheckMultiVersionAdditionalDecl( - S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration, - OldDecl, MergeTypeWithPrevious, Previous); + S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewOpenMPVariant, + Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); } /// Perform semantic checking of a new function declaration. @@ -10315,8 +10363,8 @@ // Determine whether the type of this function should be merged with // a previous visible declaration. This never happens for functions in C++, // and always happens in C if the previous declaration was visible. - bool MergeTypeWithPrevious = !getLangOpts().CPlusPlus && - !Previous.isShadowed(); + bool MergeTypeWithPrevious = + !getLangOpts().CPlusPlus && !Previous.isShadowed(); bool Redeclaration = false; NamedDecl *OldDecl = nullptr; @@ -13580,6 +13628,16 @@ else FD = cast(D); + if (getLangOpts().OpenMP && DeclareVariantScopeAttr) { + OMPDeclareVariantAttr *DeclVarAttr = FD->getAttr(); + if (!DeclVarAttr) { + DeclVarAttr = DeclareVariantScopeAttr->clone(getASTContext()); + FD->addAttr(DeclVarAttr); + } + DeclVarAttr->setInherited(false); + FD->setIsMultiVersion(); + } + // Do not push if it is a lambda because one is already pushed when building // the lambda in ActOnStartOfLambdaDefinition(). if (!isLambdaCallOperator(FD)) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -15587,7 +15587,6 @@ // of instantiation). This gives better backtraces in diagnostics. PointOfInstantiation = Loc; } - if (FirstInstantiation || TSK != TSK_ImplicitInstantiation || Func->isConstexpr()) { if (isa(Func->getDeclContext()) && diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14,6 +14,7 @@ #include "TreeTransform.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" +#include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" @@ -28,8 +29,10 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/Template.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace clang; using namespace llvm::omp; @@ -5224,11 +5227,8 @@ Diag(SR.getBegin(), diag::warn_omp_declare_variant_after_emitted) << FD->getLocation(); - // The VariantRef must point to function. - if (!VariantRef) { - Diag(SR.getBegin(), diag::err_omp_function_expected) << VariantId; - return None; - } + if (!VariantRef) + return std::make_pair(FD, VariantRef); // Do not check templates, wait until instantiation. if (VariantRef->isTypeDependent() || VariantRef->isValueDependent() || @@ -5324,6 +5324,7 @@ return None; } + // TODO check these for missing VariantRef as well enum DoesntSupport { VirtFuncs = 1, Constructors = 3, @@ -5384,82 +5385,14 @@ PDiag(diag::err_omp_declare_variant_diff) << FD->getLocation()), /*TemplatesSupported=*/true, /*ConstexprSupported=*/false, - /*CLinkageMayDiffer=*/true)) + /*CLinkageMayDiffer=*/true, + /*StorageClassMayDiffer=*/true, + /*ConstexprSpecMayDiffer=*/true, + /*InlineSpecificationMayDiffer=*/true)) return None; return std::make_pair(FD, cast(DRE)); } -void Sema::ActOnOpenMPDeclareVariantDirective( - FunctionDecl *FD, Expr *VariantRef, SourceRange SR, - ArrayRef Data) { - if (Data.empty()) - return; - SmallVector CtxScores; - SmallVector CtxSets; - SmallVector Ctxs; - SmallVector ImplVendors, DeviceKinds; - bool IsError = false; - for (const OMPCtxSelectorData &D : Data) { - OpenMPContextSelectorSetKind CtxSet = D.CtxSet; - OpenMPContextSelectorKind Ctx = D.Ctx; - if (CtxSet == OMP_CTX_SET_unknown || Ctx == OMP_CTX_unknown) - return; - Expr *Score = nullptr; - if (D.Score.isUsable()) { - Score = D.Score.get(); - if (!Score->isTypeDependent() && !Score->isValueDependent() && - !Score->isInstantiationDependent() && - !Score->containsUnexpandedParameterPack()) { - Score = - PerformOpenMPImplicitIntegerConversion(Score->getExprLoc(), Score) - .get(); - if (Score) - Score = VerifyIntegerConstantExpression(Score).get(); - } - } else { - // OpenMP 5.0, 2.3.3 Matching and Scoring Context Selectors. - // The kind, arch, and isa selectors are given the values 2^l, 2^(l+1) and - // 2^(l+2), respectively, where l is the number of traits in the construct - // set. - // TODO: implement correct logic for isa and arch traits. - // TODO: take the construct context set into account when it is - // implemented. - int L = 0; // Currently set the number of traits in construct set to 0, - // since the construct trait set in not supported yet. - if (CtxSet == OMP_CTX_SET_device && Ctx == OMP_CTX_kind) - Score = ActOnIntegerConstant(SourceLocation(), std::pow(2, L)).get(); - else - Score = ActOnIntegerConstant(SourceLocation(), 0).get(); - } - switch (Ctx) { - case OMP_CTX_vendor: - assert(CtxSet == OMP_CTX_SET_implementation && - "Expected implementation context selector set."); - ImplVendors.append(D.Names.begin(), D.Names.end()); - break; - case OMP_CTX_kind: - assert(CtxSet == OMP_CTX_SET_device && - "Expected device context selector set."); - DeviceKinds.append(D.Names.begin(), D.Names.end()); - break; - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector kind."); - } - IsError = IsError || !Score; - CtxSets.push_back(CtxSet); - Ctxs.push_back(Ctx); - CtxScores.push_back(Score); - } - if (!IsError) { - auto *NewAttr = OMPDeclareVariantAttr::CreateImplicit( - Context, VariantRef, CtxScores.begin(), CtxScores.size(), - CtxSets.begin(), CtxSets.size(), Ctxs.begin(), Ctxs.size(), - ImplVendors.begin(), ImplVendors.size(), DeviceKinds.begin(), - DeviceKinds.size(), SR); - FD->addAttr(NewAttr); - } -} - StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, @@ -17022,3 +16955,382 @@ return OMPAllocateClause::Create(Context, StartLoc, LParenLoc, Allocator, ColonLoc, EndLoc, Vars); } + +template +static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, + const FunctionDecl &TemplateFD) { + if (!FD->hasAttr()) + if (AttrTy *Attribute = TemplateFD.getAttr()) { + AttrTy *Clone = Attribute->clone(S.Context); + Clone->setInherited(true); + FD->addAttr(Clone); + } +} + +void Sema::inheritOpenMPVariantAttrs(FunctionDecl *FD, + const FunctionTemplateDecl &TD) { + const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); + copyAttrIfPresent(*this, FD, TemplateFD); +} + +// TODO: We have various representations for the same data, it might help to +// reuse some instead of converting them. +// TODO: It is unclear where this checking code should live. It is used all over +// the place and would probably fit bet in OMPDeclareVariantAttr. +using OMPContextSelectorData = + OpenMPCtxSelectorData, llvm::APSInt>; +using CompleteOMPContextSelectorData = SmallVector; + +/// Checks current context and returns true if it matches the context selector. +template +static bool checkContext(const OMPContextSelectorData &Data, + Arguments... Params) { + assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && + "Unknown context selector or context selector set."); + return false; +} + +/// Checks for implementation={vendor()} context selector. +/// \returns true iff ="llvm", false otherwise. +template <> +bool checkContext( + const OMPContextSelectorData &Data) { + return llvm::all_of(Data.Names, + [](StringRef S) { return !S.compare_lower("llvm"); }); +} + +/// Checks for device={kind()} context selector. +/// \returns true if ="host" and compilation is for host. +/// true if ="nohost" and compilation is for device. +/// true if ="cpu" and compilation is for Arm, X86 or PPC CPU. +/// true if ="gpu" and compilation is for NVPTX or AMDGCN. +/// false otherwise. +template <> +bool checkContext(const OMPContextSelectorData &Data, + const LangOptions &LO, + const TargetInfo &TI) { + for (StringRef Name : Data.Names) { + if (!Name.compare_lower("host")) { + if (LO.OpenMPIsDevice) + return false; + continue; + } + if (!Name.compare_lower("nohost")) { + if (!LO.OpenMPIsDevice) + return false; + continue; + } + switch (TI.getTriple().getArch()) { + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::aarch64_32: + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + if (Name.compare_lower("cpu")) + return false; + break; + case llvm::Triple::amdgcn: + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + if (Name.compare_lower("gpu")) + return false; + break; + case llvm::Triple::UnknownArch: + case llvm::Triple::arc: + case llvm::Triple::avr: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::hexagon: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::r600: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + case llvm::Triple::sparc: + case llvm::Triple::sparcv9: + case llvm::Triple::sparcel: + case llvm::Triple::systemz: + case llvm::Triple::tce: + case llvm::Triple::tcele: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::xcore: + case llvm::Triple::le32: + case llvm::Triple::le64: + case llvm::Triple::amdil: + case llvm::Triple::amdil64: + case llvm::Triple::hsail: + case llvm::Triple::hsail64: + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::kalimba: + case llvm::Triple::shave: + case llvm::Triple::lanai: + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + case llvm::Triple::renderscript32: + case llvm::Triple::renderscript64: + return false; + } + } + return true; +} + +static llvm::APSInt evaluateScoreExpr(Expr *Score, Sema &S, + CompleteOMPContextSelectorData &Data, + FunctionDecl *FD) { + if (FD && FD->getTemplateSpecializationArgs()) { + MultiLevelTemplateArgumentList MLTAL(*FD->getTemplateSpecializationArgs()); + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + ExprResult Result = S.SubstExpr(Score, MLTAL); + assert(!Result.isInvalid() && "Expected successful substitution."); + Score = Result.getAs(); + } + return Score->EvaluateKnownConstInt(S.getASTContext()); +} + +static CompleteOMPContextSelectorData +translateAttrToContextSelectorData(Sema &S, const OMPDeclareVariantAttr *A, + FunctionDecl *FD) { + CompleteOMPContextSelectorData Data; + if (!A) + return Data; + for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { + Data.emplace_back(); + auto CtxSet = static_cast( + *std::next(A->ctxSelectorSets_begin(), I)); + auto Ctx = static_cast( + *std::next(A->ctxSelectors_begin(), I)); + Data.back().CtxSet = CtxSet; + Data.back().Ctx = Ctx; + Expr *Score = *std::next(A->scores_begin(), I); + Data.back().Score = evaluateScoreExpr(Score, S, Data, FD); + switch (Ctx) { + case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); + break; + case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return Data; +} + +static bool +matchesOpenMPContextImpl(const CompleteOMPContextSelectorData &ContextData, + const LangOptions &LO, const TargetInfo &TI) { + for (const OMPContextSelectorData &Data : ContextData) { + switch (Data.Ctx) { + case OMP_CTX_vendor: + assert(Data.CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + if (!checkContext(Data)) + return false; + break; + case OMP_CTX_kind: + assert(Data.CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + if (!checkContext(Data, LO, TI)) + return false; + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return true; +} + +static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + llvm::SmallDenseMap, llvm::StringSet<>, 4> RHSData; + for (const OMPContextSelectorData &D : RHS) { + auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); + Pair.getSecond().insert(D.Names.begin(), D.Names.end()); + } + bool AllSetsAreEqual = true; + for (const OMPContextSelectorData &D : LHS) { + auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); + if (It == RHSData.end()) + return false; + if (D.Names.size() > It->getSecond().size()) + return false; + if (llvm::set_union(It->getSecond(), D.Names)) + return false; + AllSetsAreEqual = + AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); + } + + return LHS.size() != RHS.size() || !AllSetsAreEqual; +} + +const OMPDeclareVariantAttr * +Sema::getBetterOpenMPContextMatch(const OMPDeclareVariantAttr *LHSAttr, + const OMPDeclareVariantAttr *RHSAttr, + FunctionDecl *LHSFD, FunctionDecl *RHSFD) { + ASTContext &C = getASTContext(); + const CompleteOMPContextSelectorData LHS = + translateAttrToContextSelectorData(*this, LHSAttr, LHSFD); + const CompleteOMPContextSelectorData RHS = + translateAttrToContextSelectorData(*this, RHSAttr, RHSFD); + bool LHSMatch = LHSAttr && matchesOpenMPContextImpl(LHS, C.getLangOpts(), + C.getTargetInfo()); + bool RHSMatch = RHSAttr && matchesOpenMPContextImpl(RHS, C.getLangOpts(), + C.getTargetInfo()); + bool LHSisOK = LHSMatch && !LHSAttr->isInherited(); + bool RHSisOK = RHSMatch && !RHSAttr->isInherited(); + if (!LHSisOK && !RHSisOK) + return nullptr; + if (LHSisOK && !RHSisOK) + return LHSAttr; + if (!LHSisOK && RHSisOK) + return RHSAttr; + assert(LHSisOK && RHSisOK && "broken invariant"); + + // Score is calculated as sum of all scores + 1. + llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); + if (RHSIsSubsetOfLHS) { + LHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : LHS) { + if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { + LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { + LHSScore += Data.Score.extend(LHSScore.getBitWidth()); + } else { + LHSScore += Data.Score; + } + } + } + llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { + RHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : RHS) { + if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { + RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { + RHSScore += Data.Score.extend(RHSScore.getBitWidth()); + } else { + RHSScore += Data.Score; + } + } + } + return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0 ? LHSAttr + : RHSAttr; +} + +static bool isOpenMPContextMatch(Sema &S, const OMPDeclareVariantAttr *A, + FunctionDecl *FD) { + const CompleteOMPContextSelectorData Data = + translateAttrToContextSelectorData(S, A, FD); + ASTContext &C = S.getASTContext(); + return matchesOpenMPContextImpl(Data, C.getLangOpts(), C.getTargetInfo()); +} + +bool Sema::isNonMatchingDueToVariantContext(FunctionDecl &FD) { + auto *CtxAttr = FD.getAttr(); + if (!CtxAttr || CtxAttr->getVariantFuncRef()) + return false; + return !isOpenMPContextMatch(*this, CtxAttr, &FD); +} + +bool Sema::ActOnOpenMPDeclareVariantDirective( + FunctionDecl *FD, Expr *VariantRef, SourceRange SR, + ArrayRef Data) { + if (Data.empty()) + return false; + SmallVector CtxScores; + SmallVector CtxSets; + SmallVector Ctxs; + SmallVector ImplVendors, DeviceKinds; + bool IsError = false; + for (const OMPCtxSelectorData &D : Data) { + OpenMPContextSelectorSetKind CtxSet = D.CtxSet; + OpenMPContextSelectorKind Ctx = D.Ctx; + if (CtxSet == OMP_CTX_SET_unknown || Ctx == OMP_CTX_unknown) + return false; + Expr *Score = nullptr; + if (D.Score.isUsable()) { + Score = D.Score.get(); + if (!Score->isTypeDependent() && !Score->isValueDependent() && + !Score->isInstantiationDependent() && + !Score->containsUnexpandedParameterPack()) { + Score = + PerformOpenMPImplicitIntegerConversion(Score->getExprLoc(), Score) + .get(); + if (Score) + Score = VerifyIntegerConstantExpression(Score).get(); + } + } else { + // OpenMP 5.0, 2.3.3 Matching and Scoring Context Selectors. + // The kind, arch, and isa selectors are given the values 2^l, 2^(l+1) and + // 2^(l+2), respectively, where l is the number of traits in the construct + // set. + // TODO: implement correct logic for isa and arch traits. + // TODO: take the construct context set into account when it is + // implemented. + int L = 0; // Currently set the number of traits in construct set to 0, + // since the construct trait set in not supported yet. + if (CtxSet == OMP_CTX_SET_device && Ctx == OMP_CTX_kind) + Score = ActOnIntegerConstant(SourceLocation(), std::pow(2, L)).get(); + else + Score = ActOnIntegerConstant(SourceLocation(), 0).get(); + } + switch (Ctx) { + case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + ImplVendors.append(D.Names.begin(), D.Names.end()); + break; + case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + DeviceKinds.append(D.Names.begin(), D.Names.end()); + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + IsError = IsError || !Score; + CtxSets.push_back(CtxSet); + Ctxs.push_back(Ctx); + CtxScores.push_back(Score); + } + if (!IsError) { + auto *NewAttr = OMPDeclareVariantAttr::CreateImplicit( + Context, VariantRef, CtxScores.begin(), CtxScores.size(), + CtxSets.begin(), CtxSets.size(), Ctxs.begin(), Ctxs.size(), + ImplVendors.begin(), ImplVendors.size(), DeviceKinds.begin(), + DeviceKinds.size(), SR); + if (FD) { + FD->addAttr(NewAttr); + } else { + assert(!DeclareVariantScopeAttr && + "TODO nested begin/end declare varinat"); + DeclareVariantScopeAttr = NewAttr; + return !isOpenMPContextMatch(*this, DeclareVariantScopeAttr, nullptr); + } + } + return false; +} diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13,6 +13,7 @@ #include "clang/Sema/Overload.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" @@ -32,6 +33,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include @@ -9266,7 +9268,8 @@ return Comparison::Equal; } -static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1, +static bool isBetterMultiversionCandidate(Sema &S, + const OverloadCandidate &Cand1, const OverloadCandidate &Cand2) { if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function || !Cand2.Function->isMultiVersion()) @@ -9277,6 +9280,18 @@ if (Cand1.Function->isInvalidDecl()) return false; if (Cand2.Function->isInvalidDecl()) return true; + // If we have an OpenMP declare variant attribute on either candidate we use + // it to order the candidates. The first is only better if it has a attribute + // that is considered better or if it has no attribute and the one on the + // second candidate is not a match. + auto *OMPVariantAttr1 = Cand1.Function->getAttr(); + auto *OMPVariantAttr2 = Cand2.Function->getAttr(); + if (OMPVariantAttr1 || OMPVariantAttr2) { + auto *OMPVariantAttrBest = S.getBetterOpenMPContextMatch( + OMPVariantAttr1, OMPVariantAttr2, Cand1.Function, Cand2.Function); + return OMPVariantAttrBest == OMPVariantAttr1; + } + // If this is a cpu_dispatch/cpu_specific multiversion situation, prefer // cpu_dispatch, else arbitrarily based on the identifiers. bool Cand1CPUDisp = Cand1.Function->hasAttr(); @@ -9545,7 +9560,7 @@ if (HasPS1 != HasPS2 && HasPS1) return true; - return isBetterMultiversionCandidate(Cand1, Cand2); + return isBetterMultiversionCandidate(S, Cand1, Cand2); } /// Determine whether two declarations are "equivalent" for the purposes of @@ -9659,6 +9674,18 @@ } } + // [OpenMP] Similar to the CUDA code above, OpenMP declare variants might not + // be eligible at all so we need to filter them out early. + if (S.getLangOpts().OpenMP) { + // TODO use context information + auto IsNonMatchVariant = [&](OverloadCandidate *Cand) { + if (!Cand->Viable || !Cand->Function) + return false; + return S.isNonMatchingDueToVariantContext(*Cand->Function); + }; + llvm::erase_if(Candidates, IsNonMatchVariant); + } + // Find the best viable function. Best = end(); for (auto *Cand : Candidates) { @@ -9717,15 +9744,18 @@ // Iterate through all DeclareVariant attributes and check context selectors. const OMPDeclareVariantAttr *BestVariant = nullptr; for (const auto *A : FD->specific_attrs()) - BestVariant = - getBetterOpenMPContextMatch(S.getASTContext(), BestVariant, A); + BestVariant = S.getBetterOpenMPContextMatch(BestVariant, A, FD, FD); if (!BestVariant || !BestVariant->getVariantFuncRef()) return OR_Success; - // TODO: Handle template instantiation + if ((Best->ULE = + dyn_cast(BestVariant->getVariantFuncRef()))) + return OR_Success; + Best->Function = cast( cast(BestVariant->getVariantFuncRef()->IgnoreParenImpCasts()) ->getDecl()); + S.MarkFunctionReferenced(Loc, Best->Function); return OR_Success; } @@ -12572,6 +12602,13 @@ OverloadCandidateSet::iterator Best; OverloadingResult OverloadResult = CandidateSet.BestViableFunction(*this, Fn->getBeginLoc(), Best); + if (OverloadResult == OR_Success && Best->ULE) { + assert(OverloadResult == OR_Success && getLangOpts().OpenMP && + "Expected OpenMP variant redirect"); + return BuildOverloadedCallExpr(S, Fn, Best->ULE, LParenLoc, Args, RParenLoc, + ExecConfig, AllowTypoCorrection, + CalleesAddressIsTaken); + } return FinishOverloadedCallExpr(*this, S, Fn, ULE, LParenLoc, Args, RParenLoc, ExecConfig, &CandidateSet, &Best, diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -8596,6 +8596,9 @@ if (LangOpts.CUDA) inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate()); + if (LangOpts.OpenMP) + inheritOpenMPVariantAttrs(FD, *Specialization->getPrimaryTemplate()); + // The "previous declaration" for this function template specialization is // the prior function template specialization. Previous.clear(); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5570,7 +5570,7 @@ if (FunctionDecl *Function = dyn_cast(Inst.first)) { bool DefinitionRequired = Function->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; - if (Function->isMultiVersion()) { + if (Function->isMultiVersion() && !Function->isOpenMPMultiVersion()) { getASTContext().forEachMultiversionedFunctionVersion( Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant.c b/clang/test/AST/ast-dump-openmp-begin-declare-variant.c new file mode 100644 --- /dev/null +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck %s + +int also_before(void) { + return 0; +} + +#pragma omp begin declare variant match(device={kind(cpu)}) +int also_after(void) { + return 1; +} +int also_before(void) { + return 1; +} +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={kind(gpu)}) +int also_after(void) { + return 2; +} +int also_before(void) { + return 2; +} +#pragma omp end declare variant + + +#pragma omp begin declare variant match(device={kind(fpga)}) + +This text is never parsed! + +#pragma omp end declare variant + +int also_after(void) { + return 0; +} + +int test() { + return also_after() + also_before(); +} + +// Make sure: +// 1) we pick the right versions, that is test should reference the kind(cpu) versions. +// 2) we do not see the ast nodes for the gpu kind +// 3) we do not choke on the text in the kind(fpga) guarded scope. + +// CHECK: -FunctionDecl {{.*}} <{{.*}}3:1, line:{{.*}}:1> line:{{.*}}:5 also_before 'int (void)' +// CHECK-NEXT: | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | `-ReturnStmt {{.*}} +// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | `-OMPDeclareVariantAttr {{.*}} Inherited Implicit 1 1 cpu +// CHECK-NEXT: | |-<<>> +// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 +// CHECK-NEXT: |-FunctionDecl [[GOOD_ALSO_AFTER:0x[a-z0-9]*]] line:{{.*}}:5 used also_after 'int (void)' +// CHECK-NEXT: | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | `-ReturnStmt {{.*}} +// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 +// CHECK-NEXT: | `-OMPDeclareVariantAttr {{.*}} Implicit 1 1 cpu +// CHECK-NEXT: | |-<<>> +// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 +// CHECK-NEXT: |-FunctionDecl [[GOOD_ALSO_BEFORE:0x[a-z0-9]*]] line:{{.*}}:5 used also_before 'int (void)' +// CHECK-NEXT: | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | `-ReturnStmt {{.*}} +// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 +// CHECK-NEXT: | `-OMPDeclareVariantAttr {{.*}} Implicit 1 1 cpu +// CHECK-NEXT: | |-<<>> +// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 +// CHECK-NEXT: |-FunctionDecl {{.*}} line:{{.*}}:5 also_after 'int (void)' +// CHECK-NEXT: | |-CompoundStmt {{.*}} +// CHECK-NEXT: | | `-ReturnStmt {{.*}} +// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | `-OMPDeclareVariantAttr {{.*}} Inherited Implicit 1 1 cpu +// CHECK-NEXT: | |-<<>> +// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 +// CHECK-NEXT: `-FunctionDecl {{.*}} line:{{.*}}:5 test 'int ()' +// CHECK-NEXT: `-CompoundStmt {{.*}} +// CHECK-NEXT: `-ReturnStmt {{.*}} +// CHECK-NEXT: `-BinaryOperator {{.*}} 'int' '+' +// CHECK-NEXT: |-CallExpr {{.*}} 'int' +// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} 'int (*)(void)' +// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int (void)' lvalue Function [[GOOD_ALSO_AFTER]] 'also_after' 'int (void)' +// CHECK-NEXT: `-CallExpr {{.*}} 'int' +// CHECK-NEXT: `-ImplicitCastExpr {{.*}} 'int (*)(void)' +// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int (void)' lvalue Function [[GOOD_ALSO_BEFORE]] 'also_before' 'int (void)' + diff --git a/clang/test/OpenMP/begin_declare_variant_codegen.cpp b/clang/test/OpenMP/begin_declare_variant_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/begin_declare_variant_codegen.cpp @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s +// expected-no-diagnostics + +int bar(void) { + return 0; +} + +template +T baz(void) { return 0; } + +#pragma omp begin declare variant match(device={kind(cpu)}) +int foo(void) { + return 1; +} +int bar(void) { + return 1; +} +template +T baz(void) { return 1; } + +template +T biz(void) { return 1; } + +template +T buz(void) { return 3; } + +template <> +char buz(void) { return 1; } + +template +T bez(void) { return 3; } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={kind(gpu)}) +int foo(void) { + return 2; +} +int bar(void) { + return 2; +} +#pragma omp end declare variant + + +#pragma omp begin declare variant match(device={kind(fpga)}) + +This text is never parsed! + +#pragma omp end declare variant + +int foo(void) { + return 0; +} + +template +T biz(void) { return 0; } + +template <> +char buz(void) { return 0; } + +template <> +long bez(void) { return 0; } + +#pragma omp begin declare variant match(device = {kind(cpu)}) +template <> +long bez(void) { return 1; } +#pragma omp end declare variant + +int test() { + return foo() + bar() + baz() + biz() + buz() + bez(); +} + +// Make sure all ompvariant functions return 1 and all others return 0. + +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i32 @_Z3barv() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i32 @_Z3foov.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i32 @_Z3barv.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define signext i8 @_Z3buzIcET_v.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i8 1 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i32 @_Z3foov() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 0 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define signext i8 @_Z3buzIcET_v() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i8 0 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i64 @_Z3bezIlET_v() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 0 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define i64 @_Z3bezIlET_v.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 1 +// CHECK-NEXT: } + +// Make sure we call only ompvariant functions + +// CHECK: define i32 @_Z4testv() +// CHECK: %call = call i32 @_Z3foov.ompvariant() +// CHECK: %call1 = call i32 @_Z3barv.ompvariant() +// CHECK: %call2 = call i32 @_Z3bazIiET_v.ompvariant() +// CHECK: %call4 = call signext i16 @_Z3bizIsET_v.ompvariant() +// CHECK: %call6 = call signext i8 @_Z3buzIcET_v.ompvariant() +// CHECK: %call10 = call i64 @_Z3bezIlET_v.ompvariant() + +// CHECK: ; Function Attrs: +// CHECK-NEXT: define linkonce_odr i32 @_Z3bazIiET_v.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// CHECK-NEXT: } +// CHECK: ; Function Attrs: +// CHECK-NEXT: define linkonce_odr signext i16 @_Z3bizIsET_v.ompvariant() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i16 1 +// CHECK-NEXT: } diff --git a/clang/test/OpenMP/declare_variant_ast_print.cpp b/clang/test/OpenMP/declare_variant_ast_print.cpp --- a/clang/test/OpenMP/declare_variant_ast_print.cpp +++ b/clang/test/OpenMP/declare_variant_ast_print.cpp @@ -40,7 +40,6 @@ #pragma omp declare variant(foofoo ) match(user = {condition()}) #pragma omp declare variant(foofoo ) match(implementation={vendor(llvm)},device={kind(cpu)}) #pragma omp declare variant(foofoo ) match(implementation={vendor(unknown)}) -// TODO: Handle template instantiation #pragma omp declare variant(foofoo ) match(implementation={vendor(score(C+5): ibm, xxx, ibm)},device={kind(cpu,host)}) template T barbar(); diff --git a/clang/test/OpenMP/math_codegen.cpp b/clang/test/OpenMP/math_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/math_codegen.cpp @@ -0,0 +1,15 @@ +#include + +void math(short s, int i, float f, double d) { + sin(s); + sin(i); + sin(f); + sin(d); +} + +void foo(short s, int i, float f, double d, long double ld) { + //sin(ld); + math(s, i, f, d); +#pragma omp target + { math(s, i, f, d); } +} diff --git a/clang/test/OpenMP/math_fp_macro.cpp b/clang/test/OpenMP/math_fp_macro.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/math_fp_macro.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s +// expected-no-diagnostics + +#include + +int main() { + double a(0); + return (std::fpclassify(a) != FP_ZERO); +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -91,6 +91,8 @@ __OMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd") __OMP_DIRECTIVE_EXT(parallel_master_taskloop_simd, "parallel master taskloop simd") +__OMP_DIRECTIVE_EXT(begin_declare_variant, "begin declare variant") +__OMP_DIRECTIVE_EXT(end_declare_variant, "end declare variant") // Has to be the last because Clang implicitly expects it to be. __OMP_DIRECTIVE(unknown)