diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -14,24 +14,18 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H -#include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeTarget.h" #include "CodeGenFunction.h" #include "clang/AST/StmtOpenMP.h" namespace clang { namespace CodeGen { -class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeTarget { public: /// Defines the execution mode. - enum ExecutionMode { - /// SPMD execution mode (all threads are worker threads). - EM_SPMD, - /// Non-SPMD execution mode (1 master thread, others are workers). - EM_NonSPMD, - /// Unknown execution mode (orphaned directive). - EM_Unknown, - }; + using ExecutionMode = CGOpenMPRuntimeTarget::ExecutionMode; + private: /// Parallel outlined function work for workers to execute. llvm::SmallVector Work; @@ -52,7 +46,7 @@ void createWorkerFunction(CodeGenModule &CGM); }; - ExecutionMode getExecutionMode() const; + ExecutionMode getExecutionMode() const override; bool requiresFullRuntime() const { return RequiresFullRuntime; } @@ -92,12 +86,6 @@ // Base class overrides. // - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) override; - /// Emit outlined function specialized for the Fork-Join /// programming model for applicable target directives on the NVPTX device. /// \param D Directive to emit. @@ -197,28 +185,6 @@ explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; - /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 - /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. - virtual void emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, - SourceLocation Loc) override; - - /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' - /// clause. - /// \param NumThreads An integer value of threads. - virtual void emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc) override; - - /// This function ought to emit, in the general case, a call to - // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed - // as these numbers are obtained through the PTX grid and block configuration. - /// \param NumTeams An integer expression of teams. - /// \param ThreadLimit An integer expression of threads. - void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, - const Expr *ThreadLimit, SourceLocation Loc) override; - /// Emits inlined function for the specified OpenMP parallel // directive. /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, @@ -249,18 +215,6 @@ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override; - /// Emits code for teams call of the \a OutlinedFn with - /// variables captured in a record which address is stored in \a - /// CapturedStruct. - /// \param OutlinedFn Outlined function to be run by team masters. Type of - /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). - /// \param CapturedVars A pointer to the record with the references to - /// variables used in \a OutlinedFn function. - /// - void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Function *OutlinedFn, - ArrayRef CapturedVars) override; - /// Emits code for parallel or serial call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. @@ -371,25 +325,11 @@ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override; - /// Choose a default value for the schedule clause. - void getDefaultScheduleAndChunk(CodeGenFunction &CGF, - const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - const Expr *&ChunkExpr) const override; - /// Adjust some parameters for the target-based directives, like addresses of /// the variables captured by reference in lambdas. void adjustTargetSpecificDataForLambdas( CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; - /// Perform check on requires decl to ensure that target architecture - /// supports unified addressing - void checkArchForUnifiedAddressing(CodeGenModule &CGM, - const OMPRequiresDecl *D) const override; - - /// Returns default address space for the constant firstprivates, __constant__ - /// address space by default. - unsigned getDefaultFirstprivateAddressSpace() const override; - private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -199,28 +199,6 @@ SharedMemorySize = 128, }; -static const ValueDecl *getPrivateItem(const Expr *RefExpr) { - RefExpr = RefExpr->IgnoreParens(); - if (const auto *ASE = dyn_cast(RefExpr)) { - const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - RefExpr = Base; - } else if (auto *OASE = dyn_cast(RefExpr)) { - const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempOASE = dyn_cast(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - RefExpr = Base; - } - RefExpr = RefExpr->IgnoreParenImpCasts(); - if (const auto *DE = dyn_cast(RefExpr)) - return cast(DE->getDecl()->getCanonicalDecl()); - const auto *ME = cast(RefExpr); - return cast(ME->getMemberDecl()->getCanonicalDecl()); -} - typedef std::pair VarsDataTy; static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { return P1.first > P2.first; @@ -1859,28 +1837,6 @@ return RTLFn; } -void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, - uint64_t Size, int32_t, - llvm::GlobalValue::LinkageTypes) { - // TODO: Add support for global variables on the device after declare target - // support. - if (!isa(Addr)) - return; - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - // Get "nvvm.annotations" metadata node - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); -} - void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -1933,36 +1889,11 @@ } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM, "_", "$") { + : CGOpenMPRuntimeTarget(CGM) { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); } -void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, - SourceLocation Loc) { - // Do nothing in case of SPMD mode and L0 parallel. - if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) - return; - - CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); -} - -void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, - llvm::Value *NumThreads, - SourceLocation Loc) { - // Do nothing in case of SPMD mode and L0 parallel. - if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) - return; - - CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); -} - -void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, - const Expr *NumTeams, - const Expr *ThreadLimit, - SourceLocation Loc) {} - llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -2024,7 +1955,7 @@ return; for (const auto *C : Dir->getClausesOfKind()) { for (const Expr *E : C->getVarRefs()) - Vars.push_back(getPrivateItem(E)); + Vars.push_back(CGOpenMPRuntimeTarget::getUnderlyingVar(E)); } } @@ -2036,7 +1967,7 @@ "expected teams directive."); for (const auto *C : D.getClausesOfKind()) { for (const Expr *E : C->privates()) - Vars.push_back(getPrivateItem(E)); + Vars.push_back(CGOpenMPRuntimeTarget::getUnderlyingVar(E)); } } @@ -2460,25 +2391,6 @@ } } -void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars) { - if (!CGF.HaveInsertPoint()) - return; - - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - llvm::SmallVector OutlinedFnArgs; - OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); -} - void CGOpenMPRuntimeNVPTX::emitParallelCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, const Expr *IfCond) { @@ -4769,18 +4681,6 @@ CGF, S, ScheduleKind, Chunk); } -void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( - CodeGenFunction &CGF, const OMPLoopDirective &S, - OpenMPScheduleClauseKind &ScheduleKind, - const Expr *&ChunkExpr) const { - ScheduleKind = OMPC_SCHEDULE_static; - // Chunk size is 1 in this case. - llvm::APInt ChunkSize(32, 1); - ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize, - CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), - SourceLocation()); -} - void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( CodeGenFunction &CGF, const OMPExecutableDirective &D) const { assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && @@ -4833,10 +4733,6 @@ } } -unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { - return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); -} - // Get current CudaArch and ignore any unknown values static CudaArch getCudaArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) @@ -4855,56 +4751,6 @@ return CudaArch::UNKNOWN; } -/// Check to see if target architecture supports unified addressing which is -/// a restriction for OpenMP requires clause "unified_shared_memory". -void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - CodeGenModule &CGM, const OMPRequiresDecl *D) const { - for (const OMPClause *Clause : D->clauselists()) { - if (Clause->getClauseKind() == OMPC_unified_shared_memory) { - switch (getCudaArch(CGM)) { - case CudaArch::SM_20: - case CudaArch::SM_21: - case CudaArch::SM_30: - case CudaArch::SM_32: - case CudaArch::SM_35: - case CudaArch::SM_37: - case CudaArch::SM_50: - case CudaArch::SM_52: - case CudaArch::SM_53: - case CudaArch::SM_60: - case CudaArch::SM_61: - case CudaArch::SM_62: - CGM.Error(Clause->getBeginLoc(), - "Target architecture does not support unified addressing"); - return; - case CudaArch::SM_70: - case CudaArch::SM_72: - case CudaArch::SM_75: - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX810: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX909: - case CudaArch::UNKNOWN: - break; - case CudaArch::LAST: - llvm_unreachable("Unexpected Cuda arch."); - } - } - } -} - /// Get number of SMs and number of blocks per SM. static std::pair getSMsBlocksPerSM(CodeGenModule &CGM) { std::pair Data; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeTarget.h b/clang/lib/CodeGen/CGOpenMPRuntimeTarget.h new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeTarget.h @@ -0,0 +1,104 @@ +//===-- CGOpenMPRuntimeTarget.h --- Common OpenMP target codegen ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code common to all OpenMP target codegens. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H +#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H + +#include "CGOpenMPRuntime.h" + +namespace clang { +namespace CodeGen { + +struct CGOpenMPRuntimeTarget : public CGOpenMPRuntime { + + explicit CGOpenMPRuntimeTarget(CodeGenModule &CGM); + + /// Defines the execution mode. + enum ExecutionMode { + /// SPMD execution mode (all threads are worker threads). + EM_SPMD, + /// Non-SPMD execution mode (1 master thread, others are workers). + EM_NonSPMD, + /// Unknown execution mode (orphaned directive). + EM_Unknown, + }; + + /// Return the execution mode, if not overloaded this is always Unknown. + virtual ExecutionMode getExecutionMode() const { return EM_Unknown; } + + /// Return the value decleration encapsulated in the expression \p E. + static const ValueDecl *getUnderlyingVar(const Expr *E); + + // + // Base class overrides. + // + + /// Creates offloading entry for the provided entry ID \a ID, + /// address \a Addr, size \a Size, and flags \a Flags. + void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, + uint64_t Size, int32_t Flags, + llvm::GlobalValue::LinkageTypes Linkage) override; + + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 + /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. + virtual void emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) override; + + /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' + /// clause. + /// \param NumThreads An integer value of threads. + virtual void emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) override; + + /// Set the number of teams to \p NumTeams and the thread limit to + /// \p ThreadLimit. + /// + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc) override; + + /// Choose a default value for the schedule clause. + void getDefaultScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, + OpenMPScheduleClauseKind &ScheduleKind, + const Expr *&ChunkExpr) const override; + + /// Emits code for teams call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run by team masters. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars) override; + + /// Returns default address space for the constant firstprivates, __constant__ + /// address space by default. + unsigned getDefaultFirstprivateAddressSpace() const override; + + /// Perform check on requires decl to ensure that target architecture + /// supports unified addressing + void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const override; +}; + +} // namespace CodeGen +} // namespace clang + +#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETARGET_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp @@ -0,0 +1,199 @@ +//===-- CGOpenMPRuntimeTarget.cpp - Common OpenMP target codegen ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the code generation interface for OpenMP target offloading +// though the Target Region (TRegion) interface. +// +// See the file comment in CGOpenMPRuntimeTarget.h for more information. +// +//===----------------------------------------------------------------------===// + +#include "CGOpenMPRuntimeTarget.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Cuda.h" + +using namespace clang; +using namespace CodeGen; + +CGOpenMPRuntimeTarget::CGOpenMPRuntimeTarget(CodeGenModule &CGM) + : CGOpenMPRuntime(CGM, "_", "$") { + if (!CGM.getLangOpts().OpenMPIsDevice) + llvm_unreachable("Target code generation does only handle device code!"); +} + +const ValueDecl *CGOpenMPRuntimeTarget::getUnderlyingVar(const Expr *E) { + E = E->IgnoreParens(); + if (const auto *ASE = dyn_cast(E)) { + const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + E = Base; + } else if (auto *OASE = dyn_cast(E)) { + const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempOASE = dyn_cast(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + E = Base; + } + E = E->IgnoreParenImpCasts(); + if (const auto *DE = dyn_cast(E)) + return cast(DE->getDecl()->getCanonicalDecl()); + const auto *ME = cast(E); + return cast(ME->getMemberDecl()->getCanonicalDecl()); +} + +void CGOpenMPRuntimeTarget::createOffloadEntry( + llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, + llvm::GlobalValue::LinkageTypes) { + // TODO: Add support for global variables on the device after declare target + // support. + if (!isa(Addr)) + return; + llvm::Module &M = CGM.getModule(); + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + + llvm::Metadata *MDVals[] = { + llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), + llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +void CGOpenMPRuntimeTarget::emitProcBindClause( + CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) { + // Do nothing in case of SPMD mode and L0 parallel. + if (getExecutionMode() == CGOpenMPRuntimeTarget::EM_SPMD) + return; + + CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); +} + +void CGOpenMPRuntimeTarget::emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // Do nothing in case of SPMD mode and L0 parallel. + if (getExecutionMode() == CGOpenMPRuntimeTarget::EM_SPMD) + return; + + CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); +} + +void CGOpenMPRuntimeTarget::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) {} + +void CGOpenMPRuntimeTarget::getDefaultScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { + ScheduleKind = OMPC_SCHEDULE_static; + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create( + CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ 0), + SourceLocation()); +} + +void CGOpenMPRuntimeTarget::emitTeamsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, + llvm::Function *OutlinedFn, ArrayRef CapturedVars) { + if (!CGF.HaveInsertPoint()) + return; + + Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + llvm::SmallVector OutlinedFnArgs; + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); +} + +unsigned CGOpenMPRuntimeTarget::getDefaultFirstprivateAddressSpace() const { + return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); +} + +// Get current CudaArch and ignore any unknown values +static CudaArch getCudaArch(CodeGenModule &CGM) { + if (!CGM.getTarget().hasFeature("ptx")) + return CudaArch::UNKNOWN; + llvm::StringMap Features; + CGM.getTarget().initFeatureMap(Features, CGM.getDiags(), + CGM.getTarget().getTargetOpts().CPU, + CGM.getTarget().getTargetOpts().Features); + for (const auto &Feature : Features) { + if (Feature.getValue()) { + CudaArch Arch = StringToCudaArch(Feature.getKey()); + if (Arch != CudaArch::UNKNOWN) + return Arch; + } + } + return CudaArch::UNKNOWN; +} + +/// Check to see if target architecture supports unified addressing which is +/// a restriction for OpenMP requires clause "unified_shared_memory". +void CGOpenMPRuntimeTarget::checkArchForUnifiedAddressing( + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + switch (getCudaArch(CGM)) { + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + CGM.Error(Clause->getBeginLoc(), + "Target architecture does not support unified addressing"); + return; + case CudaArch::SM_70: + case CudaArch::SM_72: + case CudaArch::SM_75: + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: + case CudaArch::GFX909: + case CudaArch::UNKNOWN: + break; + case CudaArch::LAST: + llvm_unreachable("Unexpected Cuda arch."); + } + } + } +} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -69,6 +69,7 @@ CGOpenCLRuntime.cpp CGOpenMPRuntime.cpp CGOpenMPRuntimeNVPTX.cpp + CGOpenMPRuntimeTarget.cpp CGRecordLayoutBuilder.cpp CGStmt.cpp CGStmtOpenMP.cpp