Index: openmp/libomptarget/include/device.h =================================================================== --- openmp/libomptarget/include/device.h +++ openmp/libomptarget/include/device.h @@ -444,6 +444,14 @@ int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); + /// Notify the plugin about a new mapping starting at the host address + /// \p HstPtr and \p Size bytes. + int32_t notifyDataMapped(void *HstPtr, int64_t Size); + + /// Notify the plugin about an existing mapping being unmapped starting at + /// the host address \p HstPtr. + int32_t notifyDataUnmapped(void *HstPtr); + // Launch the kernel identified by \p TgtEntryPtr with the given arguments. int32_t launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, const KernelArgsTy &KernelArgs, Index: openmp/libomptarget/include/omptargetplugin.h =================================================================== --- openmp/libomptarget/include/omptargetplugin.h +++ openmp/libomptarget/include/omptargetplugin.h @@ -209,6 +209,15 @@ // unlock/unpin host memory int32_t __tgt_rtl_data_unlock(int32_t ID, void *HstPtr); +// Notify the plugin about a new mapping starting at the host address \p HstPtr +// and \p Size bytes. The plugin may lock/pin that buffer to achieve optimal +// memory transfers involving that buffer. +int32_t __tgt_rtl_data_notify_mapped(int32_t ID, void *HstPtr, int64_t Size); + +// Notify the plugin about an existing mapping being unmapped, starting at the +// host address \p HstPtr and \p Size bytes. +int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr); + #ifdef __cplusplus } #endif Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -70,6 +70,8 @@ const char **); typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **); typedef int32_t(data_unlock_ty)(int32_t, void *); + typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t); + typedef int32_t(data_notify_unmapped_ty)(int32_t, void *); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -120,6 +122,8 @@ release_async_info_ty *release_async_info = nullptr; data_lock_ty *data_lock = nullptr; data_unlock_ty *data_unlock = nullptr; + data_notify_mapped_ty *data_notify_mapped = nullptr; + data_notify_unmapped_ty *data_notify_unmapped = nullptr; // Are there images associated with this RTL. bool IsUsed = false; Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1845,6 +1845,38 @@ return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n"); } + /// Check through the HSA runtime whether the \p HstPtr buffer is pinned. + Expected isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, + void *&BaseDevAccessiblePtr, + size_t &BaseSize) const override { + hsa_amd_pointer_info_t Info; + Info.size = sizeof(hsa_amd_pointer_info_t); + + hsa_status_t Status = + hsa_amd_pointer_info(HstPtr, &Info, /* Allocator */ nullptr, + /* Number of accessible agents (out) */ nullptr, + /* Accessible agents */ nullptr); + if (auto Err = Plugin::check(Status, "Error in hsa_amd_pointer_info: %s")) + return Err; + + // The buffer may be locked or allocated through HSA allocators. Assume that + // the buffer is host pinned if the runtime reports a HSA type. + if (Info.type != HSA_EXT_POINTER_TYPE_LOCKED && + Info.type != HSA_EXT_POINTER_TYPE_HSA) + return false; + + assert(Info.hostBaseAddress && "Invalid host pinned address"); + assert(Info.agentBaseAddress && "Invalid agent pinned address"); + assert(Info.sizeInBytes > 0 && "Invalid pinned allocation size"); + + // Save the allocation info in the output parameters. + BaseHstPtr = Info.hostBaseAddress; + BaseDevAccessiblePtr = Info.agentBaseAddress; + BaseSize = Info.sizeInBytes; + + return true; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -269,20 +269,26 @@ /// The size of the pinned allocation. size_t Size; + /// Indicate whether the allocation was locked from outside the plugin, for + /// instance, from the application. The externally locked allocations are + /// not unlocked by the plugin when unregistering the last user. + bool ExternallyLocked; + /// The number of references to the pinned allocation. The allocation should /// remain pinned and registered to the map until the number of references /// becomes zero. mutable size_t References; - /// Create an entry with the host and device acessible pointers, and the - /// buffer size. - EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size) + /// Create an entry with the host and device acessible pointers, the buffer + /// size, and a boolean indicating whether the buffer was locked externally. + EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size, bool ExternallyLocked) : HstPtr(HstPtr), DevAccessiblePtr(DevAccessiblePtr), Size(Size), - References(1) {} + ExternallyLocked(ExternallyLocked), References(1) {} /// Utility constructor used for std::set searches. EntryTy(void *HstPtr) - : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), References(0) {} + : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), + ExternallyLocked(false), References(0) {} }; /// Comparator of mep entries. Use the host pointer to enforce an order @@ -304,54 +310,89 @@ /// Reference to the corresponding device. GenericDeviceTy &Device; - /// Find an allocation that intersects with \p Buffer pointer. Assume - /// the map's mutex is acquired. - PinnedAllocSetTy::iterator findIntersecting(const void *Buffer) const { + /// Envar that indicates whether mapped host bufffers should be locked + /// automatically. + BoolEnvar OMPX_LockMappedBuffers; + + /// Find an allocation that intersects with \p HstPtr pointer. Assume the + /// map's mutex is acquired. + const EntryTy *findIntersecting(const void *HstPtr) const { if (Allocs.empty()) - return Allocs.end(); + return nullptr; // Search the first allocation with starting address that is not less than // the buffer address. - auto It = Allocs.lower_bound({const_cast(Buffer)}); + auto It = Allocs.lower_bound({const_cast(HstPtr)}); // Direct match of starting addresses. - if (It != Allocs.end() && It->HstPtr == Buffer) - return It; + if (It != Allocs.end() && It->HstPtr == HstPtr) + return &(*It); // Not direct match but may be a previous pinned allocation in the map which // contains the buffer. Return false if there is no such a previous // allocation. if (It == Allocs.begin()) - return Allocs.end(); + return nullptr; // Move to the previous pinned allocation. --It; // The buffer is not contained in the pinned allocation. - if (advanceVoidPtr(It->HstPtr, It->Size) > Buffer) - return It; + if (advanceVoidPtr(It->HstPtr, It->Size) > HstPtr) + return &(*It); // None found. - return Allocs.end(); + return nullptr; + } + + /// Insert an entry to the map representing a locked buffer. The number of references is set to + /// one. + Error insertEntry(void *HstPtr, void *DevAccessiblePtr, size_t Size, bool ExternallyLocked = false); + + /// Erase an existing entry from the map. + Error eraseEntry(const EntryTy &Entry); + + /// Register a new user into an entry that represents a locked buffer. Check + /// also that the registered buffer with \p HstPtr address and \p Size is + /// actually contained into the entry. + Error registerEntryUse(const EntryTy &Entry, void *HstPtr, size_t Size); + + /// Unregister a user from the entry and return whether it is the last user. + /// If it is the last user, the entry will have to be removed from the map + /// and unlock the entry's host buffer (if necessary). + Expected unregisterEntryUse(const EntryTy &Entry); + + /// Indicate whether the first range A fully contains the second range B. + static bool contains(void *BeginA, size_t SizeA, void *BeginB, size_t SizeB) { + void *EndA = advanceVoidPtr(BeginA, SizeA); + void *EndB = advanceVoidPtr(BeginB, SizeB); + return (BeginB >= BeginA && EndB <= EndA); + } + + /// Indicate whether the first range A intersects with the second range B. + static bool intersects(void *BeginA, size_t SizeA, void *BeginB, size_t SizeB) { + void *EndA = advanceVoidPtr(BeginA, SizeA); + void *EndB = advanceVoidPtr(BeginB, SizeB); + return (BeginA <= EndB) and (EndA >= BeginB); } public: /// Create the map of pinned allocations corresponding to a specific device. - PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device) {} - - /// Register a host buffer that was recently locked. None of the already - /// registered pinned allocations should intersect with this new one. The - /// registration requires the host pointer in \p HstPtr, the pointer that the - /// devices should use when transferring data from/to the allocation in - /// \p DevAccessiblePtr, and the size of the allocation in \p Size. Notice - /// that some plugins may use the same pointer for the \p HstPtr and - /// \p DevAccessiblePtr. The allocation must be unregistered using the + PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device), + OMPX_LockMappedBuffers("LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS", false) {} + + /// Register a buffer that was recently allocated as a locked host buffer. + /// None of the already registered pinned allocations should intersect with + /// this new one. The registration requires the host pointer in \p HstPtr, + /// the device accessible pointer in \p DevAccessiblePtr, and the size of the + /// allocation in \p Size. The allocation must be unregistered using the /// unregisterHostBuffer function. Error registerHostBuffer(void *HstPtr, void *DevAccessiblePtr, size_t Size); /// Unregister a host pinned allocation passing the host pointer which was /// previously registered using the registerHostBuffer function. When calling - /// this function, the pinned allocation cannot have any other user. + /// this function, the pinned allocation cannot have any other user and will + /// not be unlocked by this function. Error unregisterHostBuffer(void *HstPtr); /// Lock the host buffer at \p HstPtr or register a new user if it intersects @@ -365,6 +406,15 @@ /// pinned allocation is removed from the map and the memory is unlocked. Error unlockHostBuffer(void *HstPtr); + /// Lock or register a host buffer that was recently mapped by libomptarget. + /// This behavior is applied if LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS is + /// enabled. Even if not enabled, externally locked buffers are registered + /// in order to optimize their transfers. + Error lockMappedHostBuffer(void *HstPtr, size_t Size); + + /// Unlock or unregister a host buffer that was unmapped by libomptarget. + Error unlockUnmappedHostBuffer(void *HstPtr); + /// Return the device accessible pointer associated to the host pinned /// allocation which the \p HstPtr belongs, if any. Return null in case the /// \p HstPtr does not belong to any host pinned allocation. The device @@ -374,13 +424,12 @@ std::shared_lock Lock(Mutex); // Find the intersecting allocation if any. - auto It = findIntersecting(HstPtr); - if (It == Allocs.end()) + const EntryTy *Entry = findIntersecting(HstPtr); + if (!Entry) return nullptr; - const EntryTy &Entry = *It; - return advanceVoidPtr(Entry.DevAccessiblePtr, - getPtrDiff(HstPtr, Entry.HstPtr)); + return advanceVoidPtr(Entry->DevAccessiblePtr, + getPtrDiff(HstPtr, Entry->HstPtr)); } /// Check whether a buffer belongs to a registered host pinned allocation. @@ -388,7 +437,7 @@ std::shared_lock Lock(Mutex); // Return whether there is an intersecting allocation. - return (findIntersecting(const_cast(HstPtr)) != Allocs.end()); + return (findIntersecting(const_cast(HstPtr)) != nullptr); } }; @@ -457,15 +506,41 @@ return PinnedAllocs.lockHostBuffer(HstPtr, Size); } - virtual Expected dataLockImpl(void *HstPtr, int64_t Size) = 0; - /// Unpin a host memory buffer that was previously pinned. Error dataUnlock(void *HstPtr) { return PinnedAllocs.unlockHostBuffer(HstPtr); } + /// Lock the host buffer \p HstPtr with \p Size bytes with the vendor-specific + /// API and return the device accessible pointer. + virtual Expected dataLockImpl(void *HstPtr, int64_t Size) = 0; + + /// Unlock a previously locked host buffer starting at \p HstPtr. virtual Error dataUnlockImpl(void *HstPtr) = 0; + /// Mark the host buffer with address \p HstPtr and \p Size bytes as a mapped + /// buffer. This means that libomptarget created a new mapping of that host + /// buffer (e.g., because a user OpenMP target map) and the buffer may be used + /// as source/destination of memory transfers. We can use this information to + /// lock the host buffer and optimize its memory transfers. + Error notifyDataMapped(void *HstPtr, int64_t Size) { + return PinnedAllocs.lockMappedHostBuffer(HstPtr, Size); + } + + /// Mark the host buffer with address \p HstPtr as unmapped. This means that + /// libomptarget removed an existing mapping. If the plugin locked the buffer + /// in notifyDataMapped, this function should unlock it. + Error notifyDataUnmapped(void *HstPtr) { + return PinnedAllocs.unlockUnmappedHostBuffer(HstPtr); + } + + /// Check whether the host buffer with address \p HstPtr is pinned by the + /// underlying vendor-specific runtime (if any). Retrieve the host pointer, + /// the device accessible pointer and the size of the original pinned buffer. + virtual Expected isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, + void *&BaseDevAccessiblePtr, + size_t &BaseSize) const = 0; + /// Submit data to the device (host to device transfer). Error dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo); Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -582,20 +582,66 @@ return ExecModeGlobal.getValue(); } +Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr, size_t Size, bool ExternallyLocked) { + // Insert the new entry into the map. + auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked}); + if (!Res.second) + return Plugin::error("Cannot insert locked buffer entry"); + + // Check whether the next entry overlaps with the inserted entry. + auto It = std::next(Res.first); + if (It == Allocs.end()) + return Plugin::success(); + + const EntryTy *NextEntry = &(*It); + if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size)) + return Plugin::error("Partial overlapping not allowed in locked buffers"); + + return Plugin::success(); +} + +Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) { + // Erase the existing entry. Notice this requires an additional map lookup, + // but this should not be a performance issue. Using iterators would make + // the code more difficult to read. + size_t Erased = Allocs.erase({Entry.HstPtr}); + if (!Erased) + return Plugin::error("Cannot erase locked buffer entry"); + return Plugin::success(); +} + +Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry, void *HstPtr, size_t Size) { + if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size)) + return Plugin::error("Partial overlapping not allowed in locked buffers"); + + ++Entry.References; + return Plugin::success(); +} + +Expected PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) { + if (Entry.References == 0) + return Plugin::error("Invalid number of references"); + + // Return whether this was the last user. + return (--Entry.References == 0); +} + Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr, void *DevAccessiblePtr, size_t Size) { assert(HstPtr && "Invalid pointer"); assert(DevAccessiblePtr && "Invalid pointer"); + assert(Size && "Invalid size"); std::lock_guard Lock(Mutex); // No pinned allocation should intersect. - auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size}); - if (!Res.second) - return Plugin::error("Cannot register locked buffer"); + const EntryTy *Entry = findIntersecting(HstPtr); + if (Entry) + return Plugin::error("Cannot insert entry due to an existing one"); - return Plugin::success(); + // Now insert the new entry. + return insertEntry(HstPtr, DevAccessiblePtr, Size); } Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) { @@ -603,62 +649,56 @@ std::lock_guard Lock(Mutex); - // Find the pinned allocation starting at the host pointer address. - auto It = Allocs.find({HstPtr}); - if (It == Allocs.end()) + const EntryTy *Entry = findIntersecting(HstPtr); + if (!Entry) return Plugin::error("Cannot find locked buffer"); - const EntryTy &Entry = *It; + // The address in the entry should be the same we are unregistering. + if (Entry->HstPtr != HstPtr) + return Plugin::error("Unexpected host pointer in locked buffer entry"); + + // Unregister from the entry. + auto LastUseOrErr = unregisterEntryUse(*Entry); + if (!LastUseOrErr) + return LastUseOrErr.takeError(); // There should be no other references to the pinned allocation. - if (Entry.References > 1) + if (!(*LastUseOrErr)) return Plugin::error("The locked buffer is still being used"); - // Remove the entry from the map. - Allocs.erase(It); - - return Plugin::success(); + // Erase the entry from the map. + return eraseEntry(*Entry); } -Expected PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, - size_t Size) { +Expected PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, size_t Size) { assert(HstPtr && "Invalid pointer"); + assert(Size && "Invalid size"); std::lock_guard Lock(Mutex); - auto It = findIntersecting(HstPtr); + const EntryTy *Entry = findIntersecting(HstPtr); - // No intersecting registered allocation found in the map. We must lock and - // register the memory buffer into the map. - if (It == Allocs.end()) { - // First, lock the host buffer and retrieve the device accessible pointer. - auto PinnedPtrOrErr = Device.dataLockImpl(HstPtr, Size); - if (!PinnedPtrOrErr) - return PinnedPtrOrErr.takeError(); - - // Then, insert the host buffer entry into the map. - auto Res = Allocs.insert({HstPtr, *PinnedPtrOrErr, Size}); - if (!Res.second) - return Plugin::error("Cannot register locked buffer"); + if (Entry) { + // An already registered intersecting buffer was found. Register a new use. + if (auto Err = registerEntryUse(*Entry, HstPtr, Size)) + return Err; - // Return the device accessible pointer. - return *PinnedPtrOrErr; + // Return the device accessible pointer with the correct offset. + return advanceVoidPtr(Entry->DevAccessiblePtr, getPtrDiff(HstPtr, Entry->HstPtr)); } - const EntryTy &Entry = *It; + // No intersecting registered allocation found in the map. First, lock the + // host buffer and retrieve the device accessible pointer. + auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); + if (!DevAccessiblePtrOrErr) + return DevAccessiblePtrOrErr.takeError(); -#ifdef OMPTARGET_DEBUG - // Do not allow partial overlapping among host pinned buffers. - if (advanceVoidPtr(HstPtr, Size) > advanceVoidPtr(Entry.HstPtr, Entry.Size)) - return Plugin::error("Partial overlapping not allowed in locked memory"); -#endif - - // Increase the number of references. - Entry.References++; + // Now insert the new entry into the map. + if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size)) + return Err; - // Return the device accessible pointer after applying the correct offset. - return advanceVoidPtr(Entry.DevAccessiblePtr, - getPtrDiff(HstPtr, Entry.HstPtr)); + // Return the device accessible pointer. + return *DevAccessiblePtrOrErr; } Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) { @@ -666,28 +706,101 @@ std::lock_guard Lock(Mutex); - auto It = findIntersecting(HstPtr); - if (It == Allocs.end()) + const EntryTy *Entry = findIntersecting(HstPtr); + if (!Entry) return Plugin::error("Cannot find locked buffer"); - const EntryTy &Entry = *It; - - // Decrease the number of references. No need to do anything if there are + // Unregister from the locked buffer. No need to do anything if there are // others using the allocation. - if (--Entry.References > 0) + auto LastUseOrErr = unregisterEntryUse(*Entry); + if (!LastUseOrErr) + return LastUseOrErr.takeError(); + + // No need to do anything if there are others using the allocation. + if (!(*LastUseOrErr)) return Plugin::success(); - // This was the last user of the allocation. Unlock the original locked memory - // buffer, which is the host pointer stored in the entry. - if (auto Err = Device.dataUnlockImpl(Entry.HstPtr)) - return Err; + // This was the last user of the allocation. Unlock the original locked buffer + // if it was locked by the plugin. Do not unlock it if it was locked by an + // external entity. Unlock the buffer using the host pointer of the entry. + if (!Entry->ExternallyLocked) + if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) + return Err; - // Remove the entry from the map. - size_t Erased = Allocs.erase(Entry); - if (!Erased) - return Plugin::error("Cannot find locked buffer"); + // Erase the entry from the map. + return eraseEntry(*Entry); +} - return Plugin::success(); +Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) { + assert(HstPtr && "Invalid pointer"); + assert(Size && "Invalid size"); + + std::lock_guard Lock(Mutex); + + // If the buffer was already registered, only register a new user. Otherwise, + // we may need to insert a new entry. + const EntryTy *Entry = findIntersecting(HstPtr); + if (Entry) + return registerEntryUse(*Entry, HstPtr, Size); + + size_t BaseSize; + void *BaseHstPtr, *BaseDevAccessiblePtr; + + // Check whether the buffer is already pinned by the vendor-specific API. + auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr, BaseDevAccessiblePtr, BaseSize); + if (!IsPinnedOrErr) + return IsPinnedOrErr.takeError(); + + // If it is pinned, only insert the entry representing the whole pinned buffer. + if (*IsPinnedOrErr) + return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize, /* External */ true); + + // Otherwise, and in case automatic locking of mapped buffers is disabled, do nothing. + if (!OMPX_LockMappedBuffers) + return Plugin::success(); + + // Otherwise, lock the buffer and insert the new entry. + auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); + if (!DevAccessiblePtrOrErr) + return DevAccessiblePtrOrErr.takeError(); + + return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size); +} + +Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) { + assert(HstPtr && "Invalid pointer"); + + std::lock_guard Lock(Mutex); + + // Check whether there is any intersecting entry. + const EntryTy *Entry = findIntersecting(HstPtr); + + // No entry but automatic locking of mapped buffers is disabled, so + // nothing to do. + if (!Entry && !OMPX_LockMappedBuffers) + return Plugin::success(); + + // No entry, but the automatic locking is enabled, so this is an error. + if (!Entry) + return Plugin::error("Locked buffer not found"); + + // There is entry, so unregister a user and check whether it was the last one. + auto LastUseOrErr = unregisterEntryUse(*Entry); + if (!LastUseOrErr) + return LastUseOrErr.takeError(); + + // If it is not the last one, there is nothing to do. + if (!(*LastUseOrErr)) + return Plugin::success(); + + // Otherwise, if it was the last and the buffer was locked by the plugin, + // unlock it. + if (!Entry->ExternallyLocked) + if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) + return Err; + + // Finally erase the entry from the map. + return eraseEntry(*Entry); } Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { @@ -1116,6 +1229,28 @@ return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size) { + auto Err = Plugin::get().getDevice(DeviceId).notifyDataMapped(HstPtr, Size); + if (Err) { + REPORT("Failure to notify data mapped %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + +int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) { + auto Err = Plugin::get().getDevice(DeviceId).notifyDataUnmapped(HstPtr); + if (Err) { + REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size) { return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -500,6 +500,13 @@ Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } + Expected isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, + void *&BaseDevAccessiblePtr, + size_t &BaseSize) const override { + // TODO: Implement pinning feature for CUDA. + return false; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -224,6 +224,13 @@ /// Nothing to do when unlocking the buffer. Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } + /// Indicate that the buffer is not pinned. + Expected isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr, + void *&BaseDevAccessiblePtr, + size_t &BaseSize) const override { + return false; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/src/device.cpp =================================================================== --- openmp/libomptarget/src/device.cpp +++ openmp/libomptarget/src/device.cpp @@ -101,7 +101,8 @@ NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str()); (void)NewEntry; - return OFFLOAD_SUCCESS; + // Notify the plugin about the new mapping. + return notifyDataMapped(HstPtrBegin, Size); } int DeviceTy::disassociatePtr(void *HstPtrBegin) { @@ -124,7 +125,9 @@ if (Event) destroyEvent(Event); HDTTMap->erase(It); - return OFFLOAD_SUCCESS; + + // Notify the plugin about the unmapped memory. + return notifyDataUnmapped(HstPtrBegin); } else { REPORT("Trying to disassociate a pointer which was not mapped via " "omp_target_associate_ptr\n"); @@ -305,6 +308,12 @@ Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); TargetPointer = (void *)Ptr; + + // Notify the plugin about the new mapping. + if (notifyDataMapped(HstPtrBegin, Size)) + return {{false /* IsNewEntry */, false /* IsHostPointer */}, + nullptr /* Entry */, + nullptr /* TargetPointer */}; } else { // This entry is not present and we did not create a new entry for it. IsPresent = false; @@ -485,6 +494,10 @@ } int Ret = deleteData((void *)Entry->TgtPtrBegin); + + // Notify the plugin about the unmapped memory. + Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin); + delete Entry; return Ret; @@ -591,6 +604,33 @@ DstPtr, Size, AsyncInfo); } +int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { + if (!RTL->data_notify_mapped) + return OFFLOAD_SUCCESS; + + DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=" PRId64 "\n", + DPxPTR(HstPtr), Size); + + if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) { + REPORT("Notifiying about data mapping failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { + if (!RTL->data_notify_unmapped) + return OFFLOAD_SUCCESS; + + DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr)); + + if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { + REPORT("Notifiying about data unmapping failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + // Run region on device int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -205,6 +205,10 @@ (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, false /*UseHoldRefCount*/, CurrHostEntry->name, true /*IsRefCountINF*/)); + + // Notify about the new mapping. + if (Device.notifyDataMapped(CurrHostEntry->addr, CurrHostEntry->size)) + return OFFLOAD_FAIL; } } } Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -246,6 +246,10 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_data_lock"); *((void **)&RTL.data_unlock) = DynLibrary->getAddressOfSymbol("__tgt_rtl_data_unlock"); + *((void **)&RTL.data_notify_mapped) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_mapped"); + *((void **)&RTL.data_notify_unmapped) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_unmapped"); RTL.LibraryHandler = std::move(DynLibrary); Index: openmp/libomptarget/test/lit.cfg =================================================================== --- openmp/libomptarget/test/lit.cfg +++ openmp/libomptarget/test/lit.cfg @@ -25,6 +25,9 @@ if 'LIBOMPTARGET_NEXTGEN_PLUGINS' in os.environ: config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS'] = os.environ['LIBOMPTARGET_NEXTGEN_PLUGINS'] +if 'LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS' in os.environ: + config.environment['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS'] = os.environ['LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS'] + if 'OMP_TARGET_OFFLOAD' in os.environ: config.environment['OMP_TARGET_OFFLOAD'] = os.environ['OMP_TARGET_OFFLOAD']