Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -2573,5 +2573,5 @@ } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(D); + getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); } Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -636,6 +636,9 @@ /// must be emitted. llvm::SmallDenseSet DeferredGlobalVariables; + /// Creates and registers requires directives. + llvm::Function *createRequiresDirectiveRegistration(); + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); @@ -1429,6 +1432,10 @@ /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); + /// Creates and returns a registration function for when at least one + /// requires directives was used in the current module. + virtual llvm::Function *emitRequiresDirectiveRegFun(); + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. @@ -1597,7 +1604,8 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const; /// Checks if the variable has associated OMPAllocateDeclAttr attribute with /// the predefined allocator and translates it into the corresponding address Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -694,6 +694,8 @@ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams_nowait, + // Call to void __tgt_register_requires(int64_t flags); + OMPRTL__tgt_register_requires, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -721,6 +723,25 @@ OMPRTL__tgt_target_data_update_nowait, }; +enum OpenMPOffloadingRequiresDirFlags : int64_t { + /// no requires directive present. + OMP_REQ_NONE = 0x000, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x001, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x002, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x004, + /// atomic_default_mem_order seq_cst clause. + OMP_REQ_ATOMIC_DEFAULT_SEQ_CST = 0x008, + /// atomic_default_mem_order acq_rel clause. + OMP_REQ_ATOMIC_DEFAULT_ACQ_REL = 0x010, + /// atomic_default_mem_order relaxed clause. + OMP_REQ_ATOMIC_DEFAULT_RELAXED = 0x020, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x040 +}; + /// A basic class for pre|post-action for advanced codegen sequence for OpenMP /// region. class CleanupTy final : public EHScopeStack::Cleanup { @@ -2294,6 +2315,14 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; } + case OMPRTL__tgt_register_requires: { + // Build void __tgt_register_requires(int64_t flags); + llvm::Type *TypeParams[] = {CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3838,6 +3867,36 @@ } llvm::Function * +CGOpenMPRuntime::createRequiresDirectiveRegistration() { + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) + return nullptr; + + ASTContext &C = CGM.getContext(); + + llvm::Function *RequiresRegFn; + { + CodeGenFunction CGF(CGM); + const auto &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, {}); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string ReqName = getName({"omp_offloading", "requires_reg"}); + RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); + int64_t Flags = OMP_REQ_NONE; + //TODO: check for other requires clauses. + if (CGF.CGM.HasRequiresUnifiedSharedMemory) { + Flags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + } + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), + llvm::ConstantInt::get(CGF.CGM.Int64Ty, Flags)); + CGF.FinishFunction(); + } + return RequiresRegFn; +} + +llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. @@ -7921,6 +7980,10 @@ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + // If using unified memory, no need to do the mappings. + if (CGF.CGM.HasRequiresUnifiedSharedMemory) + return; + // Map other list items in the map clause which are not captured variables // but "declare target link" global variables., for (const auto *C : this->CurDir.getClausesOfKind()) { @@ -8935,6 +8998,16 @@ " Expected target-based directive."); } +void CGOpenMPRuntime::checkArchForUnifiedAddressing( + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + CGM.HasRequiresUnifiedSharedMemory = true; + break; + } + } +} + bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) { if (!VD || !VD->hasAttr()) @@ -8993,6 +9066,11 @@ return !AlreadyEmittedTargetFunctions.insert(Name).second; } +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { + // Create and register the function that handles the requires directives. + return createRequiresDirectiveRegistration(); +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -383,7 +383,8 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const override; + void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const override; /// Returns default address space for the constant firstprivates, __constant__ /// address space by default. Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4942,7 +4942,8 @@ /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - const OMPRequiresDecl *D) const { + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + CGOpenMPRuntime::checkArchForUnifiedAddressing(CGM, D); for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { switch (getCudaArch(CGM)) { Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -291,6 +291,8 @@ typedef std::vector CtorList; + bool HasRequiresUnifiedSharedMemory = false; + private: ASTContext &Context; const LangOptions &LangOpts; Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -410,6 +410,10 @@ AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { + if (llvm::Function *OpenMPRequiresDirectiveRegFun = + OpenMPRuntime->emitRequiresDirectiveRegFun()) { + AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0, nullptr); + } if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? Index: test/OpenMP/openmp_offload_registration.cpp =================================================================== --- test/OpenMP/openmp_offload_registration.cpp +++ test/OpenMP/openmp_offload_registration.cpp @@ -26,7 +26,7 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 2, [[DEVTY]]* getelementptr inbounds ([2 x [[DEVTY]]], [2 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] +// CHECK: appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] // Check presence of foo() and the outlined target region // CHECK: define void [[FOO:@.+]]() @@ -34,6 +34,11 @@ // Check registration and unregistration code. +// CHECK: define internal void @.omp_offloading.requires_reg() +// CHECK: call i32 @__tgt_register_requires(i64 0) +// CHECK: ret void +// CHECK: declare i32 @__tgt_register_requires(i64) + // CHECK: define internal void @[[UNREGFN:.+]](i8*) // CHECK-SAME: comdat($[[REGFN]]) { // CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])