diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -683,6 +683,7 @@ * ``LIBOMPTARGET_HEAP_SIZE=`` * ``LIBOMPTARGET_STACK_SIZE=`` * ``LIBOMPTARGET_SHARED_MEMORY_SIZE=`` + * ``LIBOMPTARGET_MAP_FORCE_ATOMIC=[no, off, 0] (default on)`` LIBOMPTARGET_DEBUG """""""""""""""""" @@ -1003,6 +1004,23 @@ Offloading + +LIBOMPTARGET_MAP_FORCE_ATOMIC +""""""""""""""""""""""""""""" + +The OpenMP standard guarantees that map clauses are atomic. However, the this +can have a drastic performance impact. Users that do not require atomic map +clauses can disable them to potentially recover lost performance. As a +consequence, users have to guarantee themselves that no two map clauses will +concurrently map the same memory. If the memory is already mapped and the +map clauses will only modify the reference counter from a non-zero count to +another non-zero count, concurrent map clauses are supported regardless of +this option. To disable forced atomic map clauses use "no", "off", or "0" +as the value of the ``LIBOMPTARGET_MAP_FORCE_ATOMIC`` environment variable. +The default behavior of LLVM 14 is to force atomic maps clauses, prior +versions of LLVM did not. + + LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) ------------------------------------------------------------------- diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -132,6 +132,11 @@ /// Get the event bound to this data map. void *getEvent() const { return States->Event; } + /// Add a new event, if necessary, and unlock the entry. + /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise. + int addEventIfNecessaryAndUnlock(DeviceTy &Device, + AsyncInfoTy &AsyncInfo) const; + /// Set the event bound to this data map. void setEvent(void *Event) const { States->Event = Event; } @@ -415,6 +420,9 @@ /// Struct for the data required to handle plugins struct PluginManager { + PluginManager(bool UseEventsForAtomicTransfers) + : UseEventsForAtomicTransfers(UseEventsForAtomicTransfers) {} + /// RTLs identified on the host RTLsTy RTLs; @@ -435,6 +443,10 @@ // Store target policy (disabled, mandatory, default) kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy + + /// Flag to indicate if we use events to ensure the atomicity of + /// map clauses or not. Can be modified with an environment variable. + const bool UseEventsForAtomicTransfers; }; extern PluginManager *PM; diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -20,6 +20,36 @@ #include #include +int HostDataToTargetTy::addEventIfNecessaryAndUnlock( + DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { + // First, check if the user disabled atomic map transfer/malloc/dealloc. + if (!PM->UseEventsForAtomicTransfers) + return OFFLOAD_SUCCESS; + + void *Event = getEvent(); + bool NeedNewEvent = Event == nullptr; + if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { + REPORT("Failed to create event\n"); + unlock(); + return OFFLOAD_FAIL; + } + + // We cannot assume the event should not be nullptr because we don't + // know if the target support event. But if a target doesn't, + // recordEvent should always return success. + if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); + unlock(); + return OFFLOAD_FAIL; + } + + if (NeedNewEvent) + setEvent(Event); + + unlock(); + return OFFLOAD_SUCCESS; +} + DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), @@ -278,30 +308,11 @@ TargetPointer = nullptr; } - void *Event = Entry->getEvent(); - bool NeedNewEvent = Event == nullptr; - if (NeedNewEvent && createEvent(&Event) != OFFLOAD_SUCCESS) { - Entry->unlock(); - REPORT("Failed to create event\n"); - return {{false /* IsNewEntry */, false /* IsHostPointer */}, - {} /* MapTableEntry */, - nullptr /* TargetPointer */}; - } - // We cannot assume the event should not be nullptr because we don't - // know if the target support event. But if a target doesn't, - // recordEvent should always return success. - Ret = recordEvent(Event, AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - Entry->unlock(); - REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event)); + if (Entry->addEventIfNecessaryAndUnlock(*this, AsyncInfo) != + OFFLOAD_SUCCESS) return {{false /* IsNewEntry */, false /* IsHostPointer */}, {} /* MapTableEntry */, nullptr /* TargetPointer */}; - } - if (NeedNewEvent) - Entry->setEvent(Event); - // We're done with the entry. Release the entry. - Entry->unlock(); } else { // Release the mapping table lock directly. DataMapMtx.unlock(); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -589,26 +589,9 @@ REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - void *Event = Pointer_TPR.MapTableEntry->getEvent(); - bool NeedNewEvent = Event == nullptr; - if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) { - Pointer_TPR.MapTableEntry->unlock(); - REPORT("Failed to create event.\n"); - return OFFLOAD_FAIL; - } - // We cannot assume the event should not be nullptr because we don't - // know if the target support event. But if a target doesn't, - // recordEvent should always return success. - Ret = Device.recordEvent(Event, AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - Pointer_TPR.MapTableEntry->unlock(); - REPORT("Failed to set dependence on event " DPxMOD "\n", - DPxPTR(Event)); + if (Pointer_TPR.MapTableEntry->addEventIfNecessaryAndUnlock( + Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } - if (NeedNewEvent) - Pointer_TPR.MapTableEntry->setEvent(Event); - Pointer_TPR.MapTableEntry->unlock(); } else Device.ShadowMtx.unlock(); } @@ -827,7 +810,8 @@ } // If the struct is to be deallocated, remove the shadow entry. if (DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); + DP("Removing shadow pointer " DPxMOD "\n", + DPxPTR((void **)Itr->first)); Itr = Device.ShadowPtrMap.erase(Itr); } else { ++Itr; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -40,7 +40,16 @@ __attribute__((constructor(101))) void init() { DP("Init target library!\n"); - PM = new PluginManager(); + + bool UseEventsForAtomicTransfers = true; + if (const char *ForceAtomicMap = getenv("LIBOMPTARGET_MAP_FORCE_ATOMIC")) { + std::string ForceAtomicMapStr(ForceAtomicMap); + if (ForceAtomicMapStr == "no" || ForceAtomicMapStr == "off" || + ForceAtomicMapStr == "0") + UseEventsForAtomicTransfers = false; + } + + PM = new PluginManager(UseEventsForAtomicTransfers); #ifdef OMPTARGET_PROFILE_ENABLED ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");