diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -1,9 +1,9 @@ ##===----------------------------------------------------------------------===## -# +# # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# +# ##===----------------------------------------------------------------------===## # # Build offloading library libomptarget.so. @@ -16,6 +16,7 @@ api.cpp device.cpp interface.cpp + memory.cpp rtl.cpp omptarget.cpp ) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "memory.h" #include "private.h" #include "rtl.h" @@ -199,7 +200,8 @@ } else { // If it is not contained and Size > 0 we should create a new entry for it. IsNew = true; - uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); + uintptr_t tp = + (uintptr_t)MemoryManager.Allocate(Size, HstPtrBegin, DeviceID); DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); @@ -280,7 +282,7 @@ if (HT.decRefCount() == 0) { DP("Deleting tgt data " DPxMOD " of size %ld\n", DPxPTR(HT.TgtPtrBegin), Size); - RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin); + MemoryManager.Free((void *)HT.TgtPtrBegin, DeviceID); DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld\n", (ForceDelete ? " (forced)" : ""), DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size); diff --git a/openmp/libomptarget/src/memory.h b/openmp/libomptarget/src/memory.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/memory.h @@ -0,0 +1,35 @@ +//===----------- memory.h - Target independent memory manager -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations for target independent memory manager. +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +// Forward declaration +struct DeviceTy; + +class MemoryManagerTy { +public: + // Allocate memory from device DeviceId + void *Allocate(size_t Size, void *HstPtr, int DeviceId); + + // Free memory on device DeviceId + int Free(void *Ptr, int DeviceId); + + // Initialize a manager with D + void Init(DeviceTy &D); + + // The number of devices it manages + size_t NumOfDevices(); +}; + +extern MemoryManagerTy MemoryManager; diff --git a/openmp/libomptarget/src/memory.cpp b/openmp/libomptarget/src/memory.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/memory.cpp @@ -0,0 +1,256 @@ +//===----------- memory.cpp - Target independent memory manager -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Functionality for managing target memory. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include "device.h" +#include "memory.h" +#include "rtl.h" + +namespace impl { +constexpr const size_t BucketSize[] = { + 0, 1U << 20, 1U << 21, 1U << 22, 1U << 23, 1U << 24, 1U << 25, + 1U << 26, 1U << 27, 1U << 28, 1U << 29, 1U << 30, 1U << 31}; + +constexpr const size_t NumBuckets = sizeof(BucketSize) / sizeof(size_t); + +// Find the previous number that is power of 2 given a number +inline size_t Flp2(size_t Num) { + Num |= Num >> 1; + Num |= Num >> 2; + Num |= Num >> 4; + Num |= Num >> 8; + Num |= Num >> 16; + Num |= Num >> 32; + Num += 1; + return Num >> 1; +} + +// Find a suitable bucket +inline int FindBucket(size_t Size) { + const size_t F = Flp2(Size); + int L = 0, H = NumBuckets - 1; + while (H - L > 1) { + int M = (L + H) >> 1; + if (BucketSize[M] == F) + return M; + if (BucketSize[M] > F) + H = M - 1; + else + L = M; + } + + assert(L >= 0 && L < NumBuckets && "L is out of range"); + + return L; +} + +struct Node { + // If Size is zero, this node has not been connected with a target memory + size_t Size; + void *Ptr; + Node(size_t S, void *P) : Size(S), Ptr(P) {} +}; + +class MemoryManagerTy { + std::list FreeList[NumBuckets]; + std::list NodeList; + std::unordered_map PtrToNodeTable; + std::mutex FreeListLocks[NumBuckets]; + std::mutex MapTableLock; + std::mutex NodeListLock; + DeviceTy *Device; + + void *allocateFromDevice(size_t Size, void *HstPtr) { + return Device->RTL->data_alloc(Device->RTLDeviceID, Size, HstPtr); + } + + int deleteFromDevice(void *Ptr) { + return Device->RTL->data_delete(Device->RTLDeviceID, Ptr); + } + + // This function is called when it tries to allocate memory on device but the + // device returns out of memory. It will first free one memory from the last + // bucket (because its buffers are large then chances are that we just need to + // free one or two) and try to allocate again until either we can get memory + // or every buffer we hold has been freed. + void *freeAndAllocate(size_t Size, void *HstPtr) { + for (int I = NumBuckets; I >= 0; --I) { + std::list &List = FreeList[I]; + std::mutex &Lock = FreeListLocks[I]; + do { + Node *P = nullptr; + // Fetch one node from the list + { + std::lock_guard G(Lock); + // We have drained this bucket. Move to the next one. + if (List.empty()) + break; + P = List.front(); + List.pop_front(); + } + + // Call device routine to free the buffer + int Ret = deleteFromDevice(P->Ptr); + // Cannot free memory on device + // TODO: Maybe should raise an expcetion? + if (Ret != OFFLOAD_SUCCESS) + return nullptr; + + // Clear the node + P->Ptr = nullptr; + P->Size = 0; + + // Swap it to the front of NodeList for later reuse + { + std::list::iterator Itr = NodeList.begin(); + std::lock_guard G(NodeListLock); + // Find the first empty node + // TODO: There can be some optimization + while (Itr->Ptr && &(*Itr) != P) + ++Itr; + std::swap(*Itr, *P); + } + + // Call device routine to try to allocate again + void *DevPtr = allocateFromDevice(Size, HstPtr); + + if (DevPtr) + return DevPtr; + } while (1); + } + + return nullptr; + } + +public: + MemoryManagerTy(DeviceTy &D) : Device(&D) {} + + ~MemoryManagerTy() { + // TODO: There is a little issue that target plugin is destroyed before this + // object, therefore the memory free will not succeed. + for (Node &N : NodeList) + if (N.Ptr) + deleteFromDevice(N.Ptr); + } + + void *allocate(size_t Size, void *HstPtr) { + assert(Size && "Size is zero"); + + void *DevPtr = nullptr; + Node *P = nullptr; + + const int B = FindBucket(Size); + std::list &List = FreeList[B]; + std::mutex &Lock = FreeListLocks[B]; + + std::list::iterator Itr = List.begin(); + Lock.lock(); + while (Itr != List.end() && (*Itr)->Size != Size) + ++Itr; + + // No available one + if (Itr == List.end()) { + Lock.unlock(); + + // Allocate one from device + DevPtr = allocateFromDevice(Size, HstPtr); + + // If device is OOM, call freeAndAllocate to free some memory in FreeList + // and then allocate again + if (DevPtr == nullptr) + DevPtr = freeAndAllocate(Size, HstPtr); + + // Something is wrong + // TODO: Should raise an exception? + if (DevPtr == nullptr) + return nullptr; + + { + std::lock_guard G(NodeListLock); + // There is no empty node in the NodeList. Create a new one. + if (NodeList.empty() || NodeList.front().Size != 0) + NodeList.emplace_back(Size, DevPtr); + else { + Node EmptyNode(std::move(NodeList.front())); + NodeList.pop_front(); + EmptyNode.Size = Size; + EmptyNode.Ptr = DevPtr; + NodeList.push_back(std::move(EmptyNode)); + } + P = &NodeList.back(); + } + } else { + DevPtr = (*Itr)->Ptr; + P = *Itr; + List.erase(Itr); + Lock.unlock(); + } + + { + std::lock_guard G(MapTableLock); + PtrToNodeTable[DevPtr] = P; + } + + return DevPtr; + } + + int free(void *DevPtr) { + Node *P = PtrToNodeTable[DevPtr]; + + // Remove this item from the map table + { + std::lock_guard G(MapTableLock); + PtrToNodeTable.erase(DevPtr); + } + + // Insert the node to the free list + const int B = FindBucket(P->Size); + std::list &List = FreeList[B]; + std::lock_guard G(FreeListLocks[B]); + List.push_back(P); + + return OFFLOAD_SUCCESS; + } +}; + +std::vector> MemoryManagers; + +inline bool isValidDeviceId(int DeviceId) { + return DeviceId >= 0 && static_cast(DeviceId) < MemoryManagers.size(); +} + +} // namespace impl + +void MemoryManagerTy::Init(DeviceTy &D) { + impl::MemoryManagers.emplace_back(std::make_shared(D)); +} + +void *MemoryManagerTy::Allocate(size_t Size, void *HstPtr, int DeviceId) { + assert(impl::isValidDeviceId(DeviceId) && "Invalid DeviceId"); + return impl::MemoryManagers[DeviceId]->allocate(Size, HstPtr); +} + +int MemoryManagerTy::Free(void *DevPtr, int DeviceId) { + assert(impl::isValidDeviceId(DeviceId) && "Invalid DeviceId"); + return impl::MemoryManagers[DeviceId]->free(DevPtr); +} + +size_t MemoryManagerTy::NumOfDevices() { return impl::MemoryManagers.size(); } + +MemoryManagerTy MemoryManager; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -14,6 +14,7 @@ #include #include "device.h" +#include "memory.h" #include "private.h" #include "rtl.h" @@ -718,8 +719,8 @@ TgtBaseOffset = 0; } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) { // Allocate memory for (first-)private array - TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID, - arg_sizes[i], HstPtrBegin); + TgtPtrBegin = + MemoryManager.Allocate(arg_sizes[i], HstPtrBegin, Device.DeviceID); if (!TgtPtrBegin) { DP ("Data allocation for %sprivate array " DPxMOD " failed, " "abort target.\n", @@ -802,7 +803,7 @@ // Deallocate (first-)private arrays for (auto it : fpArrays) { - int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); + int rt = MemoryManager.Free(it, Device.DeviceID); if (rt != OFFLOAD_SUCCESS) { DP("Deallocation of (first-)private arrays failed.\n"); return OFFLOAD_FAIL; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "memory.h" #include "private.h" #include "rtl.h" @@ -296,8 +297,13 @@ Devices[start + device_id].DeviceID = start + device_id; // RTL local device ID Devices[start + device_id].RTLDeviceID = device_id; + // Initiliaze memory manager + MemoryManager.Init(Devices[start + device_id]); } + assert(Devices.size() == MemoryManager.NumOfDevices() && + "Devices and MemoryManager have different size"); + // Initialize the index of this RTL and save it in the used RTLs. R.Idx = (UsedRTLs.empty()) ? 0