Index: clang/include/clang/AST/OpenMPClause.h =================================================================== --- clang/include/clang/AST/OpenMPClause.h +++ clang/include/clang/AST/OpenMPClause.h @@ -26,6 +26,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Sema/ParsedAttr.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" @@ -9172,6 +9173,53 @@ } }; +/// This represents 'ompx_attribute' clause in a directive that might generate +/// an outlined function. An example is given below. +/// +/// \code +/// #pragma omp target [...] ompx_attribute(flatten) +/// \endcode +class OMPXAttributeClause + : public OMPNoChildClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// The parsed attributes (clause arguments) + SmallVector Attrs; + +public: + /// Build 'ompx_attribute' clause. + /// + /// \param Attrs The parsed attributes (clause arguments) + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPXAttributeClause(ArrayRef Attrs, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OMPNoChildClause(StartLoc, EndLoc), LParenLoc(LParenLoc), Attrs(Attrs) { + } + + /// Build an empty clause. + OMPXAttributeClause() : OMPNoChildClause() {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returned the attributes parsed from this clause. + ArrayRef getAttrs() const { return Attrs; } + + /// Replace the attributes with \p NewAttrs. + void setAttrs(ArrayRef NewAttrs) { + Attrs.clear(); + Attrs.append(NewAttrs.begin(), NewAttrs.end()); + } +}; + } // namespace clang #endif // LLVM_CLANG_AST_OPENMPCLAUSE_H Index: clang/include/clang/AST/RecursiveASTVisitor.h =================================================================== --- clang/include/clang/AST/RecursiveASTVisitor.h +++ clang/include/clang/AST/RecursiveASTVisitor.h @@ -3871,6 +3871,12 @@ return true; } +template +bool RecursiveASTVisitor::VisitOMPXAttributeClause( + OMPXAttributeClause *C) { + return true; +} + // FIXME: look at the following tricky-seeming exprs to see if we // need to recurse on anything. These are ones that have methods // returning decls or qualtypes or nestednamespecifier -- though I'm Index: clang/include/clang/Basic/DiagnosticGroups.td =================================================================== --- clang/include/clang/Basic/DiagnosticGroups.td +++ clang/include/clang/Basic/DiagnosticGroups.td @@ -1278,9 +1278,10 @@ def OpenMPTarget : DiagGroup<"openmp-target", [OpenMPMapping]>; def OpenMPPre51Compat : DiagGroup<"pre-openmp-51-compat">; def OpenMP51Ext : DiagGroup<"openmp-51-extensions">; +def OpenMPExtensions : DiagGroup<"openmp-extensions">; def OpenMP : DiagGroup<"openmp", [ SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget, - OpenMPMapping, OpenMP51Ext + OpenMPMapping, OpenMP51Ext, OpenMPExtensions ]>; // Backend warnings. Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1540,6 +1540,9 @@ InGroup; def warn_omp_depend_in_ordered_deprecated : Warning<"'depend' clause for" " 'ordered' is deprecated; use 'doacross' instead">, InGroup; +def warn_omp_invalid_attribute_for_ompx_attributes : Warning<"'ompx_attribute' clause only allows " + "'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; " + "%0 is ignored">, InGroup; // Pragma loop support. def err_pragma_loop_missing_argument : Error< Index: clang/include/clang/Parse/Parser.h =================================================================== --- clang/include/clang/Parse/Parser.h +++ clang/include/clang/Parse/Parser.h @@ -3490,6 +3490,13 @@ // OMPClause *ParseOpenMPInteropClause(OpenMPClauseKind Kind, bool ParseOnly); + /// Parses a ompx_attribute clause + /// + /// \param ParseOnly true to skip the clause's semantic actions and return + /// nullptr. + // + OMPClause *ParseOpenMPOMPXAttributesClause(bool ParseOnly); + public: /// Parses simple expression in parens for single-expression clauses of OpenMP /// constructs. Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -10983,6 +10983,11 @@ bool ConstantFoldAttrArgs(const AttributeCommonInfo &CI, MutableArrayRef Args); + /// Create an CUDALaunchBoundsAttr attribute. + CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, + Expr *MaxThreads, + Expr *MinBlocks); + /// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular /// declaration. void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, @@ -10999,11 +11004,21 @@ void AddXConsumedAttr(Decl *D, const AttributeCommonInfo &CI, RetainOwnershipKind K, bool IsTemplateInstantiation); + /// Create an AMDGPUWavesPerEUAttr attribute. + AMDGPUFlatWorkGroupSizeAttr * + CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, + Expr *Max); + /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size /// attribute to a particular declaration. void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, Expr *Min, Expr *Max); + /// Create an AMDGPUWavesPerEUAttr attribute. + AMDGPUWavesPerEUAttr * + CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *Min, + Expr *Max); + /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a /// particular declaration. void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, @@ -12336,6 +12351,12 @@ ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on a well-formed 'ompx_attribute' clause. + OMPClause *ActOnOpenMPXAttributeClause(ArrayRef Attrs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); + /// The kind of conversion being performed. enum CheckedConversionKind { /// An implicit conversion. Index: clang/lib/AST/ExprConstant.cpp =================================================================== --- clang/lib/AST/ExprConstant.cpp +++ clang/lib/AST/ExprConstant.cpp @@ -4939,7 +4939,7 @@ namespace { /// A location where the result (returned value) of evaluating a /// statement should be stored. -struct StmtResult { +struct StmtResultTy { /// The APValue that should be filled in with the returned value. APValue &Value; /// The location containing the result, if any (used to support RVO). @@ -4960,12 +4960,12 @@ } -static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, +static EvalStmtResult EvaluateStmt(StmtResultTy &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *SC = nullptr); /// Evaluate the body of a loop, and translate the result as appropriate. -static EvalStmtResult EvaluateLoopBody(StmtResult &Result, EvalInfo &Info, +static EvalStmtResult EvaluateLoopBody(StmtResultTy &Result, EvalInfo &Info, const Stmt *Body, const SwitchCase *Case = nullptr) { BlockScopeRAII Scope(Info); @@ -4989,7 +4989,7 @@ } /// Evaluate a switch statement. -static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, +static EvalStmtResult EvaluateSwitch(StmtResultTy &Result, EvalInfo &Info, const SwitchStmt *SS) { BlockScopeRAII Scope(Info); @@ -5082,7 +5082,7 @@ } // Evaluate a statement. -static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, +static EvalStmtResult EvaluateStmt(StmtResultTy &Result, EvalInfo &Info, const Stmt *S, const SwitchCase *Case) { if (!Info.nextStep(S)) return ESR_Failed; @@ -6230,7 +6230,7 @@ Frame.LambdaThisCaptureField); } - StmtResult Ret = {Result, ResultSlot}; + StmtResultTy Ret = {Result, ResultSlot}; EvalStmtResult ESR = EvaluateStmt(Ret, Info, Body); if (ESR == ESR_Succeeded) { if (Callee->getReturnType()->isVoidType()) @@ -6264,7 +6264,7 @@ // FIXME: Creating an APValue just to hold a nonexistent return value is // wasteful. APValue RetVal; - StmtResult Ret = {RetVal, nullptr}; + StmtResultTy Ret = {RetVal, nullptr}; // If it's a delegating constructor, delegate. if (Definition->isDelegatingConstructor()) { @@ -6582,7 +6582,7 @@ // FIXME: Creating an APValue just to hold a nonexistent return value is // wasteful. APValue RetVal; - StmtResult Ret = {RetVal, nullptr}; + StmtResultTy Ret = {RetVal, nullptr}; if (EvaluateStmt(Ret, Info, Definition->getBody()) == ESR_Failed) return false; @@ -8012,7 +8012,7 @@ } APValue ReturnValue; - StmtResult Result = { ReturnValue, nullptr }; + StmtResultTy Result = {ReturnValue, nullptr}; EvalStmtResult ESR = EvaluateStmt(Result, Info, *BI); if (ESR != ESR_Succeeded) { // FIXME: If the statement-expression terminated due to 'return', Index: clang/lib/AST/OpenMPClause.cpp =================================================================== --- clang/lib/AST/OpenMPClause.cpp +++ clang/lib/AST/OpenMPClause.cpp @@ -2534,6 +2534,18 @@ OS << ")"; } +void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) { + OS << "ompx_attribute("; + bool IsFirst = true; + for (auto &Attr : Node->getAttrs()) { + if (!IsFirst) + OS << ", "; + Attr->printPretty(OS, Policy); + IsFirst = false; + } + OS << ")"; +} + void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx, VariantMatchInfo &VMI) const { for (const OMPTraitSet &Set : Sets) { Index: clang/lib/AST/StmtProfile.cpp =================================================================== --- clang/lib/AST/StmtProfile.cpp +++ clang/lib/AST/StmtProfile.cpp @@ -928,6 +928,8 @@ void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) { VisitOMPClauseList(C); } +void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) { +} } // namespace void Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6110,8 +6110,23 @@ DefaultValTeams, DefaultValThreads, IsOffloadEntry, OutlinedFn, OutlinedFnID); - if (OutlinedFn != nullptr) - CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + if (!OutlinedFn) + return; + + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + + for (auto *C : D.getClausesOfKind()) { + for (auto *A : C->getAttrs()) { + if (auto *Attr = dyn_cast(A)) + CGM.HandleCUDALaunchBoundsAttr(OutlinedFn, Attr); + else if (auto *Attr = dyn_cast(A)) + CGM.HandleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr); + else if (auto *Attr = dyn_cast(A)) + CGM.HandleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); + else + llvm_unreachable("Unexpected attribute kind"); + } + } } /// Checks if the expression is constant or does not have non-trivial function Index: clang/lib/CodeGen/CodeGenModule.h =================================================================== --- clang/lib/CodeGen/CodeGenModule.h +++ clang/lib/CodeGen/CodeGenModule.h @@ -1557,6 +1557,21 @@ /// because we'll lose all important information after each repl. void moveLazyEmissionStates(CodeGenModule *NewBuilder); + /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F. + void HandleCUDALaunchBoundsAttr(llvm::Function *F, + const CUDALaunchBoundsAttr *A); + + /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute + /// to \p F. Alternatively, the work group size can be taken from a \p + /// ReqdWGS. + void HandleAMDGPUFlatWorkGroupSizeAttr( + llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, + const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr); + + /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F. + void HandleAMDGPUWavesPerEUAttr(llvm::Function *F, + const AMDGPUWavesPerEUAttr *A); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, Index: clang/lib/CodeGen/Targets/AMDGPU.cpp =================================================================== --- clang/lib/CodeGen/Targets/AMDGPU.cpp +++ clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -317,26 +317,7 @@ const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { - unsigned Min = 0; - unsigned Max = 0; - if (FlatWGS) { - Min = FlatWGS->getMin() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - Max = FlatWGS->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue(); - } - if (ReqdWGS && Min == 0 && Max == 0) - Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); - - if (Min != 0) { - assert(Min <= Max && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); + M.HandleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value for HIP. @@ -349,24 +330,8 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } - if (const auto *Attr = FD->getAttr()) { - unsigned Min = - Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); - unsigned Max = Attr->getMax() ? Attr->getMax() - ->EvaluateKnownConstInt(M.getContext()) - .getExtValue() - : 0; - - if (Min != 0) { - assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min); - if (Max != 0) - AttrVal = AttrVal + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-waves-per-eu", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); - } + if (const auto *Attr = FD->getAttr()) + M.HandleAMDGPUWavesPerEUAttr(F, Attr); if (const auto *Attr = FD->getAttr()) { unsigned NumSGPR = Attr->getNumSGPR(); @@ -595,6 +560,47 @@ return F; } +void CodeGenModule::HandleAMDGPUFlatWorkGroupSizeAttr( + llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, + const ReqdWorkGroupSizeAttr *ReqdWGS) { + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); + Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); + } + if (ReqdWGS && Min == 0 && Max == 0) + Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); +} + +void CodeGenModule::HandleAMDGPUWavesPerEUAttr( + llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); + unsigned Max = + Attr->getMax() + ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() + : 0; + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); +} + std::unique_ptr CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) { return std::make_unique(CGM.getTypes()); Index: clang/lib/CodeGen/Targets/NVPTX.cpp =================================================================== --- clang/lib/CodeGen/Targets/NVPTX.cpp +++ clang/lib/CodeGen/Targets/NVPTX.cpp @@ -71,12 +71,12 @@ return true; } -private: // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, int Operand); +private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) { llvm::Value *Handle = nullptr; @@ -256,24 +256,8 @@ // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } - if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) { - // Create !{, metadata !"maxntidx", i32 } node - llvm::APSInt MaxThreads(32); - MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); - if (MaxThreads > 0) - addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); - - // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was - // not specified in __launch_bounds__ or if the user specified a 0 value, - // we don't have to add a PTX directive. - if (Attr->getMinBlocks()) { - llvm::APSInt MinBlocks(32); - MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); - if (MinBlocks > 0) - // Create !{, metadata !"minctasm", i32 } node - addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); - } - } + if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) + M.HandleCUDALaunchBoundsAttr(F, Attr); } // Attach kernel metadata directly if compiling for NVPTX. @@ -303,6 +287,28 @@ } } +void CodeGenModule::HandleCUDALaunchBoundsAttr( + llvm::Function *F, const CUDALaunchBoundsAttr *Attr) { + // Create !{, metadata !"maxntidx", i32 } node + llvm::APSInt MaxThreads(32); + MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); + if (MaxThreads > 0) + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", + MaxThreads.getExtValue()); + + // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was + // not specified in __launch_bounds__ or if the user specified a 0 value, + // we don't have to add a PTX directive. + if (Attr->getMinBlocks()) { + llvm::APSInt MinBlocks(32); + MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); + if (MinBlocks > 0) + // Create !{, metadata !"minctasm", i32 } node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", + MinBlocks.getExtValue()); + } +} + std::unique_ptr CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { return std::make_unique(CGM.getTypes()); Index: clang/lib/Parse/ParseOpenMP.cpp =================================================================== --- clang/lib/Parse/ParseOpenMP.cpp +++ clang/lib/Parse/ParseOpenMP.cpp @@ -3411,6 +3411,9 @@ << getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind); SkipUntil(tok::comma, tok::annot_pragma_openmp_end, StopBeforeMatch); break; + case OMPC_ompx_attribute: + Clause = ParseOpenMPOMPXAttributesClause(WrongDirective); + break; default: break; } @@ -3691,6 +3694,62 @@ llvm_unreachable("Unexpected interop variable clause."); } +OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) { + SourceLocation Loc = ConsumeToken(); + // Parse '('. + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.expectAndConsume(diag::err_expected_lparen_after, + getOpenMPClauseName(OMPC_ompx_attribute).data())) + return nullptr; + + ParsedAttributes ParsedAttrs(AttrFactory); + ParseAttributes(PAKM_GNU | PAKM_CXX11, ParsedAttrs); + + // Parse ')'. + if (T.consumeClose()) + return nullptr; + + if (ParseOnly) + return nullptr; + + SmallVector Attrs; + for (const ParsedAttr &PA : ParsedAttrs) { + switch (PA.getKind()) { + case ParsedAttr::AT_AMDGPUFlatWorkGroupSize: + if (!PA.checkExactlyNumArgs(Actions, 2)) + continue; + if (auto *A = Actions.CreateAMDGPUFlatWorkGroupSizeAttr( + PA, PA.getArgAsExpr(0), PA.getArgAsExpr(1))) + Attrs.push_back(A); + continue; + case ParsedAttr::AT_AMDGPUWavesPerEU: + if (!PA.checkAtLeastNumArgs(Actions, 1) || + !PA.checkAtMostNumArgs(Actions, 2)) + continue; + if (auto *A = Actions.CreateAMDGPUWavesPerEUAttr( + PA, PA.getArgAsExpr(0), + PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr)) + Attrs.push_back(A); + continue; + case ParsedAttr::AT_CUDALaunchBounds: + if (!PA.checkAtLeastNumArgs(Actions, 1) || + !PA.checkAtMostNumArgs(Actions, 2)) + continue; + if (auto *A = Actions.CreateLaunchBoundsAttr( + PA, PA.getArgAsExpr(0), + PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr)) + Attrs.push_back(A); + continue; + default: + Diag(Loc, diag::warn_omp_invalid_attribute_for_ompx_attributes) << PA; + continue; + }; + } + + return Actions.ActOnOpenMPXAttributeClause(Attrs, Loc, T.getOpenLocation(), + T.getCloseLocation()); +} + /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'. /// /// default-clause: Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace clang; @@ -5633,21 +5634,28 @@ return ValArg.getAs(); } -void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *MaxThreads, Expr *MinBlocks) { +CUDALaunchBoundsAttr * +Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads, + Expr *MinBlocks) { CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks); MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0); if (MaxThreads == nullptr) - return; + return nullptr; if (MinBlocks) { MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1); if (MinBlocks == nullptr) - return; + return nullptr; } - D->addAttr(::new (Context) - CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks)); + return ::new (Context) + CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks); +} + +void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *MaxThreads, Expr *MinBlocks) { + if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks)) + D->addAttr(Attr); } static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { @@ -7862,16 +7870,22 @@ return false; } -void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *MinExpr, Expr *MaxExpr) { +AMDGPUFlatWorkGroupSizeAttr * +Sema::CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, + Expr *MinExpr, Expr *MaxExpr) { AMDGPUFlatWorkGroupSizeAttr TmpAttr(Context, CI, MinExpr, MaxExpr); if (checkAMDGPUFlatWorkGroupSizeArguments(*this, MinExpr, MaxExpr, TmpAttr)) - return; + return nullptr; + return ::new (Context) + AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr); +} - D->addAttr(::new (Context) - AMDGPUFlatWorkGroupSizeAttr(Context, CI, MinExpr, MaxExpr)); +void Sema::addAMDGPUFlatWorkGroupSizeAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *MinExpr, Expr *MaxExpr) { + if (auto *Attr = CreateAMDGPUFlatWorkGroupSizeAttr(CI, MinExpr, MaxExpr)) + D->addAttr(Attr); } static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, @@ -7916,15 +7930,21 @@ return false; } -void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *MinExpr, Expr *MaxExpr) { +AMDGPUWavesPerEUAttr * +Sema::CreateAMDGPUWavesPerEUAttr(const AttributeCommonInfo &CI, Expr *MinExpr, + Expr *MaxExpr) { AMDGPUWavesPerEUAttr TmpAttr(Context, CI, MinExpr, MaxExpr); if (checkAMDGPUWavesPerEUArguments(*this, MinExpr, MaxExpr, TmpAttr)) - return; + return nullptr; + + return ::new (Context) AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr); +} - D->addAttr(::new (Context) - AMDGPUWavesPerEUAttr(Context, CI, MinExpr, MaxExpr)); +void Sema::addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *MinExpr, Expr *MaxExpr) { + if (auto *Attr = CreateAMDGPUWavesPerEUAttr(CI, MinExpr, MaxExpr)) + D->addAttr(Attr); } static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -30,6 +30,7 @@ #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" +#include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" @@ -24093,3 +24094,10 @@ DSAStack->addDoacrossDependClause(C, OpsOffs); return C; } + +OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef Attrs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc); +} Index: clang/lib/Sema/TreeTransform.h =================================================================== --- clang/lib/Sema/TreeTransform.h +++ clang/lib/Sema/TreeTransform.h @@ -2377,6 +2377,18 @@ EndLoc); } + /// Build a new OpenMP 'ompx_attribute' clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPXAttributeClause(ArrayRef Attrs, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPXAttributeClause(Attrs, StartLoc, LParenLoc, + EndLoc); + } + /// Build a new OpenMP 'align' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10756,6 +10768,16 @@ C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPXAttributeClause(OMPXAttributeClause *C) { + SmallVector NewAttrs; + for (auto *A : C->getAttrs()) + NewAttrs.push_back(getDerived().TransformAttr(A)); + return getDerived().RebuildOMPXAttributeClause( + NewAttrs, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); +} + //===----------------------------------------------------------------------===// // Expression transformation //===----------------------------------------------------------------------===// Index: clang/lib/Serialization/ASTReader.cpp =================================================================== --- clang/lib/Serialization/ASTReader.cpp +++ clang/lib/Serialization/ASTReader.cpp @@ -10370,6 +10370,9 @@ C = OMPDoacrossClause::CreateEmpty(Context, NumVars, NumLoops); break; } + case llvm::omp::OMPC_ompx_attribute: + C = new (Context) OMPXAttributeClause(); + break; #define OMP_CLAUSE_NO_CLASS(Enum, Str) \ case llvm::omp::Enum: \ break; @@ -11462,6 +11465,15 @@ C->setLoopData(I, Record.readSubExpr()); } +void OMPClauseReader::VisitOMPXAttributeClause(OMPXAttributeClause *C) { + AttrVec Attrs; + Record.readAttributes(Attrs); + C->setAttrs(Attrs); + C->setLocStart(Record.readSourceLocation()); + C->setLParenLoc(Record.readSourceLocation()); + C->setLocEnd(Record.readSourceLocation()); +} + OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() { OMPTraitInfo &TI = getContext().getNewOMPTraitInfo(); TI.Sets.resize(readUInt32()); Index: clang/lib/Serialization/ASTWriter.cpp =================================================================== --- clang/lib/Serialization/ASTWriter.cpp +++ clang/lib/Serialization/ASTWriter.cpp @@ -7171,6 +7171,13 @@ Record.AddStmt(C->getLoopData(I)); } +void OMPClauseWriter::VisitOMPXAttributeClause(OMPXAttributeClause *C) { + Record.AddAttributes(C->getAttrs()); + Record.AddSourceLocation(C->getBeginLoc()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getEndLoc()); +} + void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) { writeUInt32(TI->Sets.size()); for (const auto &Set : TI->Sets) { Index: clang/test/OpenMP/ompx_attributes_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/ompx_attributes_codegen.cpp @@ -0,0 +1,31 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + + +// Check that the target attributes are set on the generated kernel +void func() { + // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l15() #0 + // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l17() + // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l19() #4 + + #pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]]) + {} + #pragma omp target teams ompx_attribute(__attribute__((launch_bounds(45, 90)))) + {} + #pragma omp target teams distribute parallel for simd ompx_attribute([[clang::amdgpu_flat_work_group_size(3, 17)]]) device(3) ompx_attribute(__attribute__((amdgpu_waves_per_eu(3, 7)))) + for (int i = 0; i < 1000; ++i) + {} +} + +// CHECK: attributes #0 +// CHECK-SAME: "amdgpu-flat-work-group-size"="10,20" +// CHECK: attributes #4 +// CHECK-SAME: "amdgpu-flat-work-group-size"="3,17" +// CHECK-SAME: "amdgpu-waves-per-eu"="3,7" + +// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"maxntidx", i32 45} +// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"minctasm", i32 90} Index: clang/test/OpenMP/ompx_attributes_messages.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/ompx_attributes_messages.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -verify=expected -fopenmp -ferror-limit 100 -o - -std=c++11 %s -Wuninitialized + +void bad() { + #pragma omp target data ompx_attribute() // expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}} + #pragma omp target data ompx_attribute(__attribute__((launch_bounds(1, 2)))) // expected-error {{unexpected OpenMP clause 'ompx_attribute' in directive '#pragma omp target data'}} expected-error {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target data'}} + + #pragma omp target ompx_attribute() + {} + #pragma omp target ompx_attribute(__attribute__(())) + {} + #pragma omp target ompx_attribute(__attribute__((pure))) // expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}} + {} + #pragma omp target ompx_attribute(__attribute__((pure,amdgpu_waves_per_eu(1, 2), const))) // expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'pure' is ignored}} expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'const' is ignored}} + {} + #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu()))) // expected-error {{'amdgpu_waves_per_eu' attribute takes at least 1 argument}} + {} + #pragma omp target ompx_attribute(__attribute__((amdgpu_waves_per_eu(1, 2, 3)))) // expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}} + {} + #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1)))) // expected-error {{'amdgpu_flat_work_group_size' attribute requires exactly 2 arguments}} + {} + #pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(1, 2, 3,)))) // expected-error {{expected expression}} + {} + #pragma omp target ompx_attribute([[clang::amdgpu_waves_per_eu(1, 2, 3)]]) // expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}} + {} + #pragma omp target ompx_attribute([[clang::unknown]]) // expected-warning {{'ompx_attribute' clause only allows 'amdgpu_flat_work_group_size', 'amdgpu_waves_per_eu', and 'launch_bounds'; 'unknown' is ignored}} + {} + #pragma omp target ompx_attribute(baz) // expected-error {{expected ')'}} expected-note {{to match this '('}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1)))) + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(bad)))) // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2 // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2)) // expected-error {{expected ')'}} expected-error {{expected ')'}} expected-note {{to match this '('}} + {} + #pragma omp target ompx_attribute(__attribute__((launch_bounds(1, 2))) // expected-error {{expected ')'}} expected-note {{to match this '('}} + {} +} Index: clang/tools/libclang/CIndex.cpp =================================================================== --- clang/tools/libclang/CIndex.cpp +++ clang/tools/libclang/CIndex.cpp @@ -2720,6 +2720,8 @@ void OMPClauseEnqueue::VisitOMPDoacrossClause(const OMPDoacrossClause *C) { VisitOMPClauseList(C); } +void OMPClauseEnqueue::VisitOMPXAttributeClause(const OMPXAttributeClause *C) { +} } // namespace Index: llvm/include/llvm/Frontend/OpenMP/OMP.td =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMP.td +++ llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -448,6 +448,10 @@ let clangClass = "OMPDoacrossClause"; } +def OMPC_OMPX_Attribute : Clause<"ompx_attribute"> { + let clangClass = "OMPXAttributeClause"; +} + //===----------------------------------------------------------------------===// // Definition of OpenMP directives //===----------------------------------------------------------------------===// @@ -460,7 +464,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -645,7 +650,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -661,7 +667,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -744,7 +751,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -779,7 +787,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -844,7 +853,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelDo : Directive<"parallel do"> { @@ -889,7 +899,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelDoSimd : Directive<"parallel do simd"> { @@ -929,7 +940,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelMasked : Directive<"parallel masked"> { @@ -944,7 +956,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelSections : Directive<"parallel sections"> { @@ -958,7 +971,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause @@ -1127,7 +1141,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_DistributeParallelDo : Directive<"distribute parallel do"> { @@ -1174,7 +1189,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> { @@ -1256,7 +1272,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1309,7 +1326,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1337,7 +1355,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> { @@ -1350,7 +1369,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1388,7 +1408,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_TeamsDistributeParallelDoSimd : @@ -1438,7 +1459,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_TeamsDistributeParallelDo : @@ -1479,7 +1501,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ @@ -1505,7 +1528,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1546,7 +1570,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1617,7 +1642,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1678,7 +1704,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -1773,7 +1800,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelMaskedTaskloop : @@ -1798,7 +1826,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> { @@ -1883,7 +1912,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_ParallelMaskedTaskloopSimd : @@ -1914,7 +1944,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; } def OMP_Depobj : Directive<"depobj"> { @@ -2021,6 +2052,7 @@ VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -2045,7 +2077,8 @@ VersionedClause, VersionedClause, VersionedClause, - VersionedClause + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -2068,6 +2101,7 @@ VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, @@ -2094,6 +2128,7 @@ VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause,