Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -636,6 +636,10 @@ /// must be emitted. llvm::SmallDenseSet DeferredGlobalVariables; + /// Flag for keeping track of weather a requires unified_shared_memory + /// directive is present. + bool HasRequiresUnifiedSharedMemory = false; + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); @@ -1429,6 +1433,10 @@ /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); + /// Creates and returns a registration function for when at least one + /// requires directives was used in the current module. + llvm::Function *emitRequiresDirectiveRegFun(); + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. @@ -1597,7 +1605,7 @@ /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const; /// Checks if the variable has associated OMPAllocateDeclAttr attribute with /// the predefined allocator and translates it into the corresponding address Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -457,6 +457,24 @@ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Values for bit flags for marking which requires clauses have been used. +enum OpenMPOffloadingRequiresDirFlags : int64_t { + /// no requires directive present. + OMP_REQ_NONE = 0x000, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x001, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x002, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x004, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x008, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; +} // anonymous namespace + /// Describes ident structure that describes a source location. /// All descriptions are taken from /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h @@ -694,6 +712,8 @@ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams_nowait, + // Call to void __tgt_register_requires(int64_t flags); + OMPRTL__tgt_register_requires, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -2294,6 +2314,14 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; } + case OMPRTL__tgt_register_requires: { + // Build void __tgt_register_requires(int64_t flags); + llvm::Type *TypeParams[] = {CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -8935,6 +8963,14 @@ " Expected target-based directive."); } +void CGOpenMPRuntime::checkArchForUnifiedAddressing( + const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) + CGM.getOpenMPRuntime().HasRequiresUnifiedSharedMemory = true; + } +} + bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) { if (!VD || !VD->hasAttr()) @@ -8993,6 +9029,35 @@ return !AlreadyEmittedTargetFunctions.insert(Name).second; } +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || + OffloadEntriesInfoManager.empty()) + return nullptr; + + // Create and register the function that handles the requires directives. + ASTContext &C = CGM.getContext(); + + llvm::Function *RequiresRegFn; + { + CodeGenFunction CGF(CGM); + const auto &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string ReqName = getName({"omp_offloading", "requires_reg"}); + RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); + OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; + //TODO: check for other requires clauses. + if (HasRequiresUnifiedSharedMemory) + Flags |= OMP_REQ_UNIFIED_SHARED_MEMORY; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), + llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + CGF.FinishFunction(); + } + return RequiresRegFn; +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4987,6 +4987,7 @@ } } } + CGOpenMPRuntime::checkArchForUnifiedAddressing(D); } /// Get number of SMs and number of blocks per SM. Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -410,6 +410,10 @@ AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { + if (llvm::Function *OpenMPRequiresDirectiveRegFun = + OpenMPRuntime->emitRequiresDirectiveRegFun()) { + AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); + } if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? Index: test/OpenMP/openmp_offload_registration.cpp =================================================================== --- test/OpenMP/openmp_offload_registration.cpp +++ test/OpenMP/openmp_offload_registration.cpp @@ -26,7 +26,7 @@ // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 2, [[DEVTY]]* getelementptr inbounds ([2 x [[DEVTY]]], [2 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] +// CHECK: appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @[[REGFN]], i8* bitcast (void ()* @[[REGFN]] to i8*) }] // Check presence of foo() and the outlined target region // CHECK: define void [[FOO:@.+]]() @@ -34,6 +34,11 @@ // Check registration and unregistration code. +// CHECK: define internal void @.omp_offloading.requires_reg() +// CHECK: call void @__tgt_register_requires(i64 0) +// CHECK: ret void +// CHECK: declare void @__tgt_register_requires(i64) + // CHECK: define internal void @[[UNREGFN:.+]](i8*) // CHECK-SAME: comdat($[[REGFN]]) { // CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])