Index: libomptarget/include/omptarget.h =================================================================== --- libomptarget/include/omptarget.h +++ libomptarget/include/omptarget.h @@ -60,6 +60,23 @@ OMP_DECLARE_TARGET_DTOR = 0x04 }; +enum OpenMPOffloadingRequiresDirFlags { + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x001, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x002, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x004, + /// atomic_default_mem_order seq_cst clause. + OMP_REQ_ATOMIC_DEFAULT_SEQ_CST = 0x008, + /// atomic_default_mem_order acq_rel clause. + OMP_REQ_ATOMIC_DEFAULT_ACQ_REL = 0x010, + /// atomic_default_mem_order relaxed clause. + OMP_REQ_ATOMIC_DEFAULT_RELAXED = 0x020, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x040 +}; + /// This struct is a record of an entry point or global. For a function /// entry point the size is expected to be zero struct __tgt_offload_entry { @@ -113,6 +130,9 @@ size_t device_offset, int device_num); int omp_target_disassociate_ptr(void *host_ptr, int device_num); +/// add the clauses of the requires directives in a given file +void __tgt_register_requires(int64_t flags); + /// adds a target shared library to the target execution image void __tgt_register_lib(__tgt_bin_desc *desc); Index: libomptarget/include/omptargetplugin.h =================================================================== --- libomptarget/include/omptargetplugin.h +++ libomptarget/include/omptargetplugin.h @@ -31,6 +31,9 @@ // having to load the library, which can be expensive. int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image); +// Initialize the requires flags for the device. +void __tgt_rtl_init_requires(int64_t RequiresFlags); + // Initialize the specified device. In case of success return 0; otherwise // return an error code. int32_t __tgt_rtl_init_device(int32_t ID); Index: libomptarget/plugins/cuda/src/rtl.cpp =================================================================== --- libomptarget/plugins/cuda/src/rtl.cpp +++ libomptarget/plugins/cuda/src/rtl.cpp @@ -111,6 +111,9 @@ int EnvNumTeams; int EnvTeamLimit; + // OpenMP Requires Flags + int64_t RequiresFlags; + //static int EnvNumThreads; static const int HardTeamLimit = 1<<16; // 64k static const int HardThreadLimit = 1024; @@ -227,6 +230,8 @@ } else { EnvNumTeams = -1; } + + RequiresFlags = 0; } ~RTLDeviceInfoTy() { @@ -264,6 +269,11 @@ int32_t __tgt_rtl_number_of_devices() { return DeviceInfo.NumberOfDevices; } +void __tgt_rtl_init_requires(int64_t RequiresFlags) { + DP("Init requires flags to %ld\n", RequiresFlags); + DeviceInfo.RequiresFlags = RequiresFlags; +} + int32_t __tgt_rtl_init_device(int32_t device_id) { CUdevice cuDevice; @@ -435,6 +445,18 @@ DP("Entry point " DPxMOD " maps to global %s (" DPxMOD ")\n", DPxPTR(e - HostBegin), e->name, DPxPTR(cuptr)); entry.addr = (void *)cuptr; + if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + e->flags & OMP_DECLARE_TARGET_LINK) { + // By default, we handle declare target link global variables as if + // unified memory can be used. If at target region launch it turns + // out that unified memory is not available then this value will + // be overwritten with the device address of the device-allocated + // variable. + cuMemcpyHtoD(cuptr, e->addr, sizeof(void *)); + DP("Copy linked variable host address (" DPxMOD ")" + "to device address (" DPxMOD ")\n", + DPxPTR(*((void**)e->addr)), DPxPTR(cuptr)); + } DeviceInfo.addOffloadEntry(device_id, entry); Index: libomptarget/plugins/exports =================================================================== --- libomptarget/plugins/exports +++ libomptarget/plugins/exports @@ -2,6 +2,7 @@ global: __tgt_rtl_is_valid_binary; __tgt_rtl_number_of_devices; + __tgt_rtl_init_requires; __tgt_rtl_init_device; __tgt_rtl_load_binary; __tgt_rtl_data_alloc; Index: libomptarget/src/device.h =================================================================== --- libomptarget/src/device.h +++ libomptarget/src/device.h @@ -98,11 +98,13 @@ uint64_t loopTripCnt; + int64_t RTLRequiresFlags; + DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(0) {} + ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {} // The existence of mutexes makes DeviceTy non-copyable. We need to // provide a copy constructor and an assignment operator explicitly. @@ -112,7 +114,8 @@ HostDataToTargetMap(d.HostDataToTargetMap), PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(d.loopTripCnt) {} + ShadowMtx(), loopTripCnt(d.loopTripCnt), + RTLRequiresFlags(d.RTLRequiresFlags) {} DeviceTy& operator=(const DeviceTy &d) { DeviceID = d.DeviceID; @@ -124,6 +127,7 @@ PendingCtorsDtors = d.PendingCtorsDtors; ShadowPtrMap = d.ShadowPtrMap; loopTripCnt = d.loopTripCnt; + RTLRequiresFlags = d.RTLRequiresFlags; return *this; } Index: libomptarget/src/device.cpp =================================================================== --- libomptarget/src/device.cpp +++ libomptarget/src/device.cpp @@ -152,7 +152,7 @@ // Used by target_data_begin // Return the target pointer begin (where the data will be moved). -// Allocate memory if this is the first occurrence if this mapping. +// Allocate memory if this is the first occurrence of this mapping. // Increment the reference counter. // If NULL is returned, then either data allocation failed or the user tried // to do an illegal mapping. @@ -275,6 +275,7 @@ /// Init device, should not be called directly. void DeviceTy::init() { + RTL->init_requires(RTLRequiresFlags); int32_t rc = RTL->init_device(RTLDeviceID); if (rc == OFFLOAD_SUCCESS) { IsInit = true; Index: libomptarget/src/exports =================================================================== --- libomptarget/src/exports +++ libomptarget/src/exports @@ -1,5 +1,6 @@ VERS1.0 { global: + __tgt_register_requires; __tgt_register_lib; __tgt_unregister_lib; __tgt_target_data_begin; Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -57,7 +57,7 @@ } break; case tgt_default: - FATAL_MESSAGE0(1, "default offloading policy must switched to " + FATAL_MESSAGE0(1, "default offloading policy must switched to " "mandatory or disabled"); break; case tgt_mandatory: @@ -69,6 +69,12 @@ } //////////////////////////////////////////////////////////////////////////////// +/// adds requires flags +EXTERN void __tgt_register_requires(int64_t flags) { + RTLs.RegisterRequires(flags); +} + +//////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { RTLs.RegisterLib(desc); Index: libomptarget/src/rtl.h =================================================================== --- libomptarget/src/rtl.h +++ libomptarget/src/rtl.h @@ -36,6 +36,7 @@ int32_t); typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *, int32_t, int32_t, int32_t, uint64_t); + typedef int32_t(init_requires_ty)(int64_t); int32_t Idx; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -60,6 +61,7 @@ data_delete_ty *data_delete; run_region_ty *run_region; run_team_region_ty *run_team_region; + init_requires_ty *init_requires; // Are there images associated with this RTL. bool isUsed; @@ -78,8 +80,8 @@ #endif is_valid_binary(0), number_of_devices(0), init_device(0), load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0), - data_delete(0), run_region(0), run_team_region(0), isUsed(false), - Mtx() {} + data_delete(0), run_region(0), run_team_region(0), + init_requires(0), isUsed(false), Mtx() {} RTLInfoTy(const RTLInfoTy &r) : Mtx() { Idx = r.Idx; @@ -98,6 +100,7 @@ data_delete = r.data_delete; run_region = r.run_region; run_team_region = r.run_team_region; + init_requires = r.init_requires; isUsed = r.isUsed; } }; @@ -118,9 +121,14 @@ // binaries. std::vector UsedRTLs; + int64_t RequiresFlags; + explicit RTLsTy() {} // Register a shared library with all (compatible) RTLs. + void RegisterRequires(int64_t flags); + + // Register a shared library with all (compatible) RTLs. void RegisterLib(__tgt_bin_desc *desc); // Unregister a shared library from all RTLs. Index: libomptarget/src/rtl.cpp =================================================================== --- libomptarget/src/rtl.cpp +++ libomptarget/src/rtl.cpp @@ -106,6 +106,9 @@ if (!(*((void**) &R.run_team_region) = dlsym( dynlib_handle, "__tgt_rtl_run_target_team_region"))) continue; + if (!(*((void**) &R.init_requires) = dlsym( + dynlib_handle, "__tgt_rtl_init_requires"))) + continue; // No devices are supported by this RTL? if (!(R.NumberOfDevices = R.number_of_devices())) { @@ -186,6 +189,10 @@ } } +void RTLsTy::RegisterRequires(int64_t flags) { + RequiresFlags = flags; +} + void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { // Attempt to load all plugins available in the system. std::call_once(initFlag, &RTLsTy::LoadRTLs, this); @@ -222,6 +229,8 @@ Devices[start + device_id].DeviceID = start + device_id; // RTL local device ID Devices[start + device_id].RTLDeviceID = device_id; + // RTL requires flags + Devices[start + device_id].RTLRequiresFlags = RequiresFlags; } // Initialize the index of this RTL and save it in the used RTLs.