Index: openmp/libomptarget/include/device.h =================================================================== --- openmp/libomptarget/include/device.h +++ openmp/libomptarget/include/device.h @@ -444,6 +444,14 @@ int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); + /// Notify the plugins about a new mapping starting at the host address + /// \p HstPtr and \p Size bytes. + int32_t dataMapped(void *HstPtr, int64_t Size); + + /// Notify the plugins about an existing mapping being unmapped starting at + /// the host address \p HstPtr. + int32_t dataUnmapped(void *HstPtr); + // Launch the kernel identified by \p TgtEntryPtr with the given arguments. int32_t launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, const KernelArgsTy &KernelArgs, Index: openmp/libomptarget/include/omptargetplugin.h =================================================================== --- openmp/libomptarget/include/omptargetplugin.h +++ openmp/libomptarget/include/omptargetplugin.h @@ -209,6 +209,15 @@ // unlock/unpin host memory int32_t __tgt_rtl_data_unlock(int32_t ID, void *HstPtr); +// Notify the plugin about a new mapping starting at the host address \p HstPtr +// and \p Size bytes. The plugin may lock/pin that buffer to achieve optimal +// memory transfers involving that buffer. +int32_t __tgt_rtl_data_mapped(int32_t ID, void *HstPtr, int64_t Size); + +// Notify the plugin about an existing mapping being unmapped, starting at the +// host address \p HstPtr and \p Size bytes. +int32_t __tgt_rtl_data_mapped(int32_t ID, void *HstPtr, int64_t Size); + #ifdef __cplusplus } #endif Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -70,6 +70,8 @@ const char **); typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **); typedef int32_t(data_unlock_ty)(int32_t, void *); + typedef int32_t(data_mapped_ty)(int32_t, void *, int64_t); + typedef int32_t(data_unmapped_ty)(int32_t, void *); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -120,6 +122,8 @@ release_async_info_ty *release_async_info = nullptr; data_lock_ty *data_lock = nullptr; data_unlock_ty *data_unlock = nullptr; + data_mapped_ty *data_mapped = nullptr; + data_unmapped_ty *data_unmapped = nullptr; // Are there images associated with this RTL. bool IsUsed = false; Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1845,6 +1845,34 @@ return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n"); } + Expected isPinnedPtrImpl(void *HstPtr, void *&HstAllocPtr, + void *&DevAccessibleAllocPtr, + size_t &AllocSize) const override { + hsa_amd_pointer_info_t Info; + Info.size = sizeof(hsa_amd_pointer_info_t); + hsa_status_t Status = + hsa_amd_pointer_info(HstPtr, &Info, /* Allocator */ nullptr, + /* Number of accessible agents */ nullptr, + /* Accessible agents */ nullptr); + + if (auto Err = Plugin::check(Status, "Error in hsa_amd_pointer_info: %s")) + return Err; + + if (Info.type != HSA_EXT_POINTER_TYPE_LOCKED && + Info.type != HSA_EXT_POINTER_TYPE_HSA) + return false; + + assert(Info.hostBaseAddress && "Invalid host pinned address"); + assert(Info.agentBaseAddress && "Invalid agent pinned address"); + assert(Info.sizeInBytes > 0 && "Invalid pinned allocation size"); + + HstAllocPtr = Info.hostBaseAddress; + DevAccessibleAllocPtr = Info.agentBaseAddress; + AllocSize = Info.sizeInBytes; + + return true; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -269,20 +269,27 @@ /// The size of the pinned allocation. size_t Size; + /// Indicate whether the allocation was pinned from outside the plugin, for + /// instance, from the application. The externally pinned allocations are + /// not unlocked when unregistering the last user. + bool ExternallyPinned; + /// The number of references to the pinned allocation. The allocation should /// remain pinned and registered to the map until the number of references /// becomes zero. mutable size_t References; - /// Create an entry with the host and device acessible pointers, and the - /// buffer size. - EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size) + /// Create an entry with the host and device acessible pointers, the buffer + /// size, and a boolean indicating whether the buffer was locked externally. + EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size, + bool ExternallyPinned = false) : HstPtr(HstPtr), DevAccessiblePtr(DevAccessiblePtr), Size(Size), - References(1) {} + ExternallyPinned(ExternallyPinned), References(1) {} /// Utility constructor used for std::set searches. EntryTy(void *HstPtr) - : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), References(0) {} + : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0), + ExternallyPinned(false), References(0) {} }; /// Comparator of mep entries. Use the host pointer to enforce an order @@ -358,7 +365,10 @@ /// with an already existing one. A partial overlapping with extension is not /// allowed. The function returns the device accessible pointer of the pinned /// buffer. The buffer must be unlocked using the unlockHostBuffer function. - Expected lockHostBuffer(void *HstPtr, size_t Size); + /// The last parameter indicates whether the buffer is being locked because of + /// a new mapping or due to a explicit lock operation. + Expected lockHostBuffer(void *HstPtr, size_t Size, + bool FromMapping = false); /// Unlock the host buffer at \p HstPtr or unregister a user if other users /// are still using the pinned allocation. If this was the last user, the @@ -466,6 +476,18 @@ virtual Error dataUnlockImpl(void *HstPtr) = 0; + /// Pin the buffer \p HstPtr with \p Size because a new mapping was created. + Error dataMapped(void *HstPtr, int64_t Size); + + /// Unpin the \p HstPtr buffer because the mapping was removed. + Error dataUnmapped(void *HstPtr) { + return PinnedAllocs.unlockHostBuffer(HstPtr); + } + + virtual Expected isPinnedPtrImpl(void *HstPtr, void *&HstAllocPtr, + void *&DevAccessibleAllocPtr, + size_t &AllocSize) const = 0; + /// Submit data to the device (host to device transfer). Error dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo); Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -621,7 +621,8 @@ } Expected PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, - size_t Size) { + size_t Size, + bool FromMapping) { assert(HstPtr && "Invalid pointer"); std::lock_guard Lock(Mutex); @@ -631,6 +632,31 @@ // No intersecting registered allocation found in the map. We must lock and // register the memory buffer into the map. if (It == Allocs.end()) { + // When locking because a new mapping, check whether the buffer is already + // locked externally, e.g., the application. + if (FromMapping) { + size_t AllocSize; + void *HstAllocPtr, *DevAccessibleAllocPtr; + + // Check whether the host pointer has been externally allocated. + auto IsPinnedOrErr = Device.isPinnedPtrImpl( + HstPtr, HstAllocPtr, DevAccessibleAllocPtr, AllocSize); + if (!IsPinnedOrErr) + return IsPinnedOrErr.takeError(); + + // Only insert the original allocation into the map if it is already + // pinned. + if (*IsPinnedOrErr) { + auto Res = Allocs.insert({HstAllocPtr, DevAccessibleAllocPtr, AllocSize, + /* Externally locked */ true}); + if (!Res.second) + return Plugin::error("Cannot register locked buffer"); + + return advanceVoidPtr(DevAccessibleAllocPtr, + getPtrDiff(HstPtr, HstAllocPtr)); + } + } + // First, lock the host buffer and retrieve the device accessible pointer. auto PinnedPtrOrErr = Device.dataLockImpl(HstPtr, Size); if (!PinnedPtrOrErr) @@ -677,10 +703,12 @@ if (--Entry.References > 0) return Plugin::success(); - // This was the last user of the allocation. Unlock the original locked memory - // buffer, which is the host pointer stored in the entry. - if (auto Err = Device.dataUnlockImpl(Entry.HstPtr)) - return Err; + // This was the last user of the allocation. Unlock the original locked buffer + // if it was locked by the plugin. Do not unlock it if it was locked by an + // external entity. Unlock the buffer using the host pointer of the entry. + if (!Entry.ExternallyPinned) + if (auto Err = Device.dataUnlockImpl(Entry.HstPtr)) + return Err; // Remove the entry from the map. size_t Erased = Allocs.erase(Entry); @@ -764,6 +792,15 @@ return Plugin::success(); } +Error GenericDeviceTy::dataMapped(void *HstPtr, int64_t Size) { + auto PinnedPtrOrErr = PinnedAllocs.lockHostBuffer( + HstPtr, Size, /* check whether already locked */ true); + if (!PinnedPtrOrErr) + return PinnedPtrOrErr.takeError(); + + return Plugin::success(); +} + Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); @@ -1116,6 +1153,28 @@ return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_data_mapped(int32_t DeviceId, void *HstPtr, int64_t Size) { + auto Err = Plugin::get().getDevice(DeviceId).dataMapped(HstPtr, Size); + if (Err) { + REPORT("Failure to register mapped memory %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + +int32_t __tgt_rtl_data_unmapped(int32_t DeviceId, void *HstPtr) { + auto Err = Plugin::get().getDevice(DeviceId).dataUnmapped(HstPtr); + if (Err) { + REPORT("Failure to unregister unmapped memory %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size) { return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -500,6 +500,12 @@ Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } + Expected isPinnedPtrImpl(void *HstPtr, void *&HstAllocPtr, + void *&DevAccessibleAllocPtr, + size_t &AllocSize) const override { + return false; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -224,6 +224,12 @@ /// Nothing to do when unlocking the buffer. Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); } + Expected isPinnedPtrImpl(void *HstPtr, void *&HstAllocPtr, + void *&DevAccessibleAllocPtr, + size_t &AllocSize) const override { + return false; + } + /// Submit data to the device (host to device transfer). Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override { Index: openmp/libomptarget/src/device.cpp =================================================================== --- openmp/libomptarget/src/device.cpp +++ openmp/libomptarget/src/device.cpp @@ -101,7 +101,8 @@ NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str()); (void)NewEntry; - return OFFLOAD_SUCCESS; + // Notify about the new mapping. + return dataMapped(HstPtrBegin, Size); } int DeviceTy::disassociatePtr(void *HstPtrBegin) { @@ -124,7 +125,9 @@ if (Event) destroyEvent(Event); HDTTMap->erase(It); - return OFFLOAD_SUCCESS; + + // Notify about the unmapped memory. + return dataUnmapped(HstPtrBegin); } else { REPORT("Trying to disassociate a pointer which was not mapped via " "omp_target_associate_ptr\n"); @@ -305,6 +308,12 @@ Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); TargetPointer = (void *)Ptr; + + // Notify about the new mapping. + if (dataMapped(HstPtrBegin, Size)) + return {{false /* IsNewEntry */, false /* IsHostPointer */}, + nullptr /* Entry */, + nullptr /* TargetPointer */}; } else { // This entry is not present and we did not create a new entry for it. IsPresent = false; @@ -485,6 +494,10 @@ } int Ret = deleteData((void *)Entry->TgtPtrBegin); + + // Notify about the unmapped memory. + Ret |= dataUnmapped((void *)Entry->HstPtrBegin); + delete Entry; return Ret; @@ -591,6 +604,33 @@ DstPtr, Size, AsyncInfo); } +int32_t DeviceTy::dataMapped(void *HstPtr, int64_t Size) { + if (!RTL->data_mapped) + return OFFLOAD_SUCCESS; + + DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=" PRId64 "\n", + DPxPTR(HstPtr), Size); + + if (RTL->data_mapped(RTLDeviceID, HstPtr, Size)) { + REPORT("Notifiying about data mapping failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + +int32_t DeviceTy::dataUnmapped(void *HstPtr) { + if (!RTL->data_unmapped) + return OFFLOAD_SUCCESS; + + DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr)); + + if (RTL->data_unmapped(RTLDeviceID, HstPtr)) { + REPORT("Notifiying about data unmapping failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + // Run region on device int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -205,6 +205,10 @@ (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, false /*UseHoldRefCount*/, CurrHostEntry->name, true /*IsRefCountINF*/)); + + // Notify about the new mapping. + if (Device.dataMapped(CurrHostEntry->addr, CurrHostEntry->size)) + return OFFLOAD_FAIL; } } } Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -246,6 +246,10 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_data_lock"); *((void **)&RTL.data_unlock) = DynLibrary->getAddressOfSymbol("__tgt_rtl_data_unlock"); + *((void **)&RTL.data_mapped) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_data_mapped"); + *((void **)&RTL.data_unmapped) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_data_unmapped"); RTL.LibraryHandler = std::move(DynLibrary);