diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -683,6 +683,7 @@ * ``LIBOMPTARGET_HEAP_SIZE=`` * ``LIBOMPTARGET_STACK_SIZE=`` * ``LIBOMPTARGET_SHARED_MEMORY_SIZE=`` + * ``LIBOMPTARGET_MAP_FORCE_ATOMIC=[TRUE/FALSE] (default TRUE)`` LIBOMPTARGET_DEBUG """""""""""""""""" @@ -1003,6 +1004,23 @@ Offloading + +LIBOMPTARGET_MAP_FORCE_ATOMIC +""""""""""""""""""""""""""""" + +The OpenMP standard guarantees that map clauses are atomic. However, the this +can have a drastic performance impact. Users that do not require atomic map +clauses can disable them to potentially recover lost performance. As a +consequence, users have to guarantee themselves that no two map clauses will +concurrently map the same memory. If the memory is already mapped and the +map clauses will only modify the reference counter from a non-zero count to +another non-zero count, concurrent map clauses are supported regardless of +this option. To disable forced atomic map clauses use "flase"/"FALSE" as the +value of the ``LIBOMPTARGET_MAP_FORCE_ATOMIC`` environment variable. +The default behavior of LLVM 14 is to force atomic maps clauses, prior versions +of LLVM did not. + + LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) ------------------------------------------------------------------- diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -132,6 +132,10 @@ /// Get the event bound to this data map. void *getEvent() const { return States->Event; } + /// Add a new event, if necessary. + /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise. + int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const; + /// Set the event bound to this data map. void setEvent(void *Event) const { States->Event = Event; } @@ -198,16 +202,28 @@ return ThisRefCount == 1; } - void lock() const { States->UpdateMtx.lock(); } - - void unlock() const { States->UpdateMtx.unlock(); } - void setMayContainAttachedPointers() const { States->MayContainAttachedPointers = true; } bool getMayContainAttachedPointers() const { return States->MayContainAttachedPointers; } + + /// Helper to make sure the entry is locked in a scope. + /// TODO: We should generalize this and use it for all our objects that use + /// lock/unlock methods. + struct LockGuard { + const HostDataToTargetTy &Entry; + + public: + LockGuard(const HostDataToTargetTy &Entry) : Entry(Entry) { Entry.lock(); } + ~LockGuard() { Entry.unlock(); } + }; + +private: + void lock() const { States->UpdateMtx.lock(); } + + void unlock() const { States->UpdateMtx.unlock(); } }; typedef uintptr_t HstPtrBeginTy; @@ -415,6 +431,9 @@ /// Struct for the data required to handle plugins struct PluginManager { + PluginManager(bool UseEventsForAtomicTransfers) + : UseEventsForAtomicTransfers(UseEventsForAtomicTransfers) {} + /// RTLs identified on the host RTLsTy RTLs; @@ -435,6 +454,10 @@ // Store target policy (disabled, mandatory, default) kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy + + /// Flag to indicate if we use events to ensure the atomicity of + /// map clauses or not. Can be modified with an environment variable. + const bool UseEventsForAtomicTransfers; }; extern PluginManager *PM; diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -20,6 +20,33 @@ #include #include +int HostDataToTargetTy::addEventIfNecessary( + DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { + // First, check if the user disabled atomic map transfer/malloc/dealloc. + if (!PM->UseEventsForAtomicTransfers) + return OFFLOAD_SUCCESS; + + void *Event = getEvent(); + bool NeedNewEvent = Event == nullptr; + if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { + REPORT("Failed to create event\n"); + return OFFLOAD_FAIL; + } + + // We cannot assume the event should not be nullptr because we don't + // know if the target support event. But if a target doesn't, + // recordEvent should always return success. + if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); + return OFFLOAD_FAIL; + } + + if (NeedNewEvent) + setEvent(Event); + + return OFFLOAD_SUCCESS; +} + DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), @@ -261,7 +288,7 @@ if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) { // Lock the entry before releasing the mapping table lock such that another // thread that could issue data movement will get the right result. - Entry->lock(); + HostDataToTargetTy::LockGuard LG(*Entry); // Release the mapping table lock right after the entry is locked. DataMapMtx.unlock(); @@ -270,38 +297,16 @@ int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { - Entry->unlock(); REPORT("Copying data to device failed.\n"); // We will also return nullptr if the data movement fails because that // pointer points to a corrupted memory region so it doesn't make any // sense to continue to use it. TargetPointer = nullptr; - } - - void *Event = Entry->getEvent(); - bool NeedNewEvent = Event == nullptr; - if (NeedNewEvent && createEvent(&Event) != OFFLOAD_SUCCESS) { - Entry->unlock(); - REPORT("Failed to create event\n"); - return {{false /* IsNewEntry */, false /* IsHostPointer */}, - {} /* MapTableEntry */, - nullptr /* TargetPointer */}; - } - // We cannot assume the event should not be nullptr because we don't - // know if the target support event. But if a target doesn't, - // recordEvent should always return success. - Ret = recordEvent(Event, AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - Entry->unlock(); - REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); + } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != + OFFLOAD_SUCCESS) return {{false /* IsNewEntry */, false /* IsHostPointer */}, {} /* MapTableEntry */, nullptr /* TargetPointer */}; - } - if (NeedNewEvent) - Entry->setEvent(Event); - // We're done with the entry. Release the entry. - Entry->unlock(); } else { // Release the mapping table lock directly. DataMapMtx.unlock(); @@ -310,11 +315,10 @@ // Note: Entry might be nullptr because of zero length array section. if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr && !HasPresentModifier) { - Entry->lock(); + HostDataToTargetTy::LockGuard LG(*Entry); void *Event = Entry->getEvent(); if (Event) { int Ret = waitEvent(Event, AsyncInfo); - Entry->unlock(); if (Ret != OFFLOAD_SUCCESS) { // If it fails to wait for the event, we need to return nullptr in // case of any data race. @@ -323,8 +327,6 @@ {} /* MapTableEntry */, nullptr /* TargetPointer */}; } - } else { - Entry->unlock(); } } } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -573,7 +573,7 @@ } if (UpdateDevPtr) { - Pointer_TPR.MapTableEntry->lock(); + HostDataToTargetTy::LockGuard LG(*Pointer_TPR.MapTableEntry); Device.ShadowMtx.unlock(); DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", @@ -585,30 +585,12 @@ int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *), AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { - Pointer_TPR.MapTableEntry->unlock(); REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - void *Event = Pointer_TPR.MapTableEntry->getEvent(); - bool NeedNewEvent = Event == nullptr; - if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { - Pointer_TPR.MapTableEntry->unlock(); - REPORT("Failed to create event.\n"); + if (Pointer_TPR.MapTableEntry->addEventIfNecessary(Device, AsyncInfo) != + OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } - // We cannot assume the event should not be nullptr because we don't - // know if the target support event. But if a target doesn't, - // recordEvent should always return success. - Ret = Device.recordEvent(Event, AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - Pointer_TPR.MapTableEntry->unlock(); - REPORT("Failed to set dependence on event " DPxMOD "\n", - DPxPTR(Event)); - return OFFLOAD_FAIL; - } - if (NeedNewEvent) - Pointer_TPR.MapTableEntry->setEvent(Event); - Pointer_TPR.MapTableEntry->unlock(); } else Device.ShadowMtx.unlock(); } @@ -827,7 +809,8 @@ } // If the struct is to be deallocated, remove the shadow entry. if (DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); + DP("Removing shadow pointer " DPxMOD "\n", + DPxPTR((void **)Itr->first)); Itr = Device.ShadowPtrMap.erase(Itr); } else { ++Itr; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -40,7 +40,20 @@ __attribute__((constructor(101))) void init() { DP("Init target library!\n"); - PM = new PluginManager(); + + bool UseEventsForAtomicTransfers = true; + if (const char *ForceAtomicMap = getenv("LIBOMPTARGET_MAP_FORCE_ATOMIC")) { + std::string ForceAtomicMapStr(ForceAtomicMap); + if (ForceAtomicMapStr == "false" || ForceAtomicMapStr == "FALSE") + UseEventsForAtomicTransfers = false; + else if (ForceAtomicMapStr != "true" && ForceAtomicMapStr != "TRUE") + fprintf(stderr, + "Warning: 'LIBOMPTARGET_MAP_FORCE_ATOMIC' accepts only " + "'true'/'TRUE' or 'false'/'FALSE' as options, '%s' ignored\n", + ForceAtomicMap); + } + + PM = new PluginManager(UseEventsForAtomicTransfers); #ifdef OMPTARGET_PROFILE_ENABLED ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");