Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
Show First 20 Lines • Show All 576 Lines • ▼ Show 20 Lines | GenericDeviceTy::getExecutionModeForKernel(StringRef Name, | ||||
// Check that the retrieved execution mode is valid. | // Check that the retrieved execution mode is valid. | ||||
if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) | if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) | ||||
return Plugin::error("Invalid execution mode %d for '%s'", | return Plugin::error("Invalid execution mode %d for '%s'", | ||||
ExecModeGlobal.getValue(), Name.data()); | ExecModeGlobal.getValue(), Name.data()); | ||||
return ExecModeGlobal.getValue(); | return ExecModeGlobal.getValue(); | ||||
} | } | ||||
Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr, | |||||
size_t Size, bool ExternallyLocked) { | |||||
// Insert the new entry into the map. | |||||
auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked}); | |||||
if (!Res.second) | |||||
return Plugin::error("Cannot insert locked buffer entry"); | |||||
// Check whether the next entry overlaps with the inserted entry. | |||||
auto It = std::next(Res.first); | |||||
if (It == Allocs.end()) | |||||
return Plugin::success(); | |||||
const EntryTy *NextEntry = &(*It); | |||||
if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size)) | |||||
return Plugin::error("Partial overlapping not allowed in locked buffers"); | |||||
return Plugin::success(); | |||||
} | |||||
Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) { | |||||
// Erase the existing entry. Notice this requires an additional map lookup, | |||||
// but this should not be a performance issue. Using iterators would make | |||||
// the code more difficult to read. | |||||
size_t Erased = Allocs.erase({Entry.HstPtr}); | |||||
if (!Erased) | |||||
return Plugin::error("Cannot erase locked buffer entry"); | |||||
return Plugin::success(); | |||||
} | |||||
Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry, | |||||
void *HstPtr, size_t Size) { | |||||
if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size)) | |||||
return Plugin::error("Partial overlapping not allowed in locked buffers"); | |||||
++Entry.References; | |||||
return Plugin::success(); | |||||
} | |||||
Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) { | |||||
if (Entry.References == 0) | |||||
return Plugin::error("Invalid number of references"); | |||||
// Return whether this was the last user. | |||||
return (--Entry.References == 0); | |||||
} | |||||
Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr, | Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr, | ||||
void *DevAccessiblePtr, | void *DevAccessiblePtr, | ||||
size_t Size) { | size_t Size) { | ||||
assert(HstPtr && "Invalid pointer"); | assert(HstPtr && "Invalid pointer"); | ||||
assert(DevAccessiblePtr && "Invalid pointer"); | assert(DevAccessiblePtr && "Invalid pointer"); | ||||
assert(Size && "Invalid size"); | |||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | std::lock_guard<std::shared_mutex> Lock(Mutex); | ||||
// No pinned allocation should intersect. | // No pinned allocation should intersect. | ||||
auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size}); | const EntryTy *Entry = findIntersecting(HstPtr); | ||||
if (!Res.second) | if (Entry) | ||||
return Plugin::error("Cannot register locked buffer"); | return Plugin::error("Cannot insert entry due to an existing one"); | ||||
return Plugin::success(); | // Now insert the new entry. | ||||
return insertEntry(HstPtr, DevAccessiblePtr, Size); | |||||
} | } | ||||
Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) { | Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) { | ||||
assert(HstPtr && "Invalid pointer"); | assert(HstPtr && "Invalid pointer"); | ||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | std::lock_guard<std::shared_mutex> Lock(Mutex); | ||||
// Find the pinned allocation starting at the host pointer address. | const EntryTy *Entry = findIntersecting(HstPtr); | ||||
auto It = Allocs.find({HstPtr}); | if (!Entry) | ||||
if (It == Allocs.end()) | |||||
return Plugin::error("Cannot find locked buffer"); | return Plugin::error("Cannot find locked buffer"); | ||||
const EntryTy &Entry = *It; | // The address in the entry should be the same we are unregistering. | ||||
if (Entry->HstPtr != HstPtr) | |||||
return Plugin::error("Unexpected host pointer in locked buffer entry"); | |||||
// Unregister from the entry. | |||||
auto LastUseOrErr = unregisterEntryUse(*Entry); | |||||
if (!LastUseOrErr) | |||||
return LastUseOrErr.takeError(); | |||||
// There should be no other references to the pinned allocation. | // There should be no other references to the pinned allocation. | ||||
if (Entry.References > 1) | if (!(*LastUseOrErr)) | ||||
return Plugin::error("The locked buffer is still being used"); | return Plugin::error("The locked buffer is still being used"); | ||||
// Remove the entry from the map. | // Erase the entry from the map. | ||||
Allocs.erase(It); | return eraseEntry(*Entry); | ||||
return Plugin::success(); | |||||
} | } | ||||
Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, | Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, | ||||
size_t Size) { | size_t Size) { | ||||
assert(HstPtr && "Invalid pointer"); | assert(HstPtr && "Invalid pointer"); | ||||
assert(Size && "Invalid size"); | |||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | std::lock_guard<std::shared_mutex> Lock(Mutex); | ||||
auto It = findIntersecting(HstPtr); | const EntryTy *Entry = findIntersecting(HstPtr); | ||||
// No intersecting registered allocation found in the map. We must lock and | if (Entry) { | ||||
// register the memory buffer into the map. | // An already registered intersecting buffer was found. Register a new use. | ||||
if (It == Allocs.end()) { | if (auto Err = registerEntryUse(*Entry, HstPtr, Size)) | ||||
// First, lock the host buffer and retrieve the device accessible pointer. | return Err; | ||||
auto PinnedPtrOrErr = Device.dataLockImpl(HstPtr, Size); | |||||
if (!PinnedPtrOrErr) | |||||
return PinnedPtrOrErr.takeError(); | |||||
// Then, insert the host buffer entry into the map. | |||||
auto Res = Allocs.insert({HstPtr, *PinnedPtrOrErr, Size}); | |||||
if (!Res.second) | |||||
return Plugin::error("Cannot register locked buffer"); | |||||
// Return the device accessible pointer. | // Return the device accessible pointer with the correct offset. | ||||
return *PinnedPtrOrErr; | return advanceVoidPtr(Entry->DevAccessiblePtr, | ||||
getPtrDiff(HstPtr, Entry->HstPtr)); | |||||
} | } | ||||
const EntryTy &Entry = *It; | // No intersecting registered allocation found in the map. First, lock the | ||||
// host buffer and retrieve the device accessible pointer. | |||||
auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); | |||||
if (!DevAccessiblePtrOrErr) | |||||
return DevAccessiblePtrOrErr.takeError(); | |||||
#ifdef OMPTARGET_DEBUG | // Now insert the new entry into the map. | ||||
// Do not allow partial overlapping among host pinned buffers. | if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size)) | ||||
if (advanceVoidPtr(HstPtr, Size) > advanceVoidPtr(Entry.HstPtr, Entry.Size)) | return Err; | ||||
return Plugin::error("Partial overlapping not allowed in locked memory"); | |||||
#endif | |||||
// Increase the number of references. | |||||
Entry.References++; | |||||
// Return the device accessible pointer after applying the correct offset. | // Return the device accessible pointer. | ||||
return advanceVoidPtr(Entry.DevAccessiblePtr, | return *DevAccessiblePtrOrErr; | ||||
getPtrDiff(HstPtr, Entry.HstPtr)); | |||||
} | } | ||||
Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) { | Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) { | ||||
assert(HstPtr && "Invalid pointer"); | assert(HstPtr && "Invalid pointer"); | ||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | std::lock_guard<std::shared_mutex> Lock(Mutex); | ||||
auto It = findIntersecting(HstPtr); | const EntryTy *Entry = findIntersecting(HstPtr); | ||||
if (It == Allocs.end()) | if (!Entry) | ||||
return Plugin::error("Cannot find locked buffer"); | return Plugin::error("Cannot find locked buffer"); | ||||
const EntryTy &Entry = *It; | // Unregister from the locked buffer. No need to do anything if there are | ||||
// Decrease the number of references. No need to do anything if there are | |||||
// others using the allocation. | // others using the allocation. | ||||
if (--Entry.References > 0) | auto LastUseOrErr = unregisterEntryUse(*Entry); | ||||
if (!LastUseOrErr) | |||||
return LastUseOrErr.takeError(); | |||||
// No need to do anything if there are others using the allocation. | |||||
if (!(*LastUseOrErr)) | |||||
return Plugin::success(); | return Plugin::success(); | ||||
// This was the last user of the allocation. Unlock the original locked memory | // This was the last user of the allocation. Unlock the original locked buffer | ||||
// buffer, which is the host pointer stored in the entry. | // if it was locked by the plugin. Do not unlock it if it was locked by an | ||||
if (auto Err = Device.dataUnlockImpl(Entry.HstPtr)) | // external entity. Unlock the buffer using the host pointer of the entry. | ||||
if (!Entry->ExternallyLocked) | |||||
if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) | |||||
return Err; | return Err; | ||||
// Remove the entry from the map. | // Erase the entry from the map. | ||||
size_t Erased = Allocs.erase(Entry); | return eraseEntry(*Entry); | ||||
if (!Erased) | } | ||||
return Plugin::error("Cannot find locked buffer"); | |||||
Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) { | |||||
assert(HstPtr && "Invalid pointer"); | |||||
assert(Size && "Invalid size"); | |||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | |||||
// If previously registered, just register a new user on the entry. | |||||
const EntryTy *Entry = findIntersecting(HstPtr); | |||||
if (Entry) | |||||
return registerEntryUse(*Entry, HstPtr, Size); | |||||
size_t BaseSize; | |||||
void *BaseHstPtr, *BaseDevAccessiblePtr; | |||||
// Check if it was externally pinned by a vendor-specific API. | |||||
auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr, | |||||
BaseDevAccessiblePtr, BaseSize); | |||||
if (!IsPinnedOrErr) | |||||
return IsPinnedOrErr.takeError(); | |||||
// If pinned, just insert the entry representing the whole pinned buffer. | |||||
if (*IsPinnedOrErr) | |||||
return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize, | |||||
/* Externally locked */ true); | |||||
// Not externally pinned. Do nothing if locking of mapped buffers is disabled. | |||||
if (!OMPX_LockMappedBuffers) | |||||
return Plugin::success(); | return Plugin::success(); | ||||
// Otherwise, lock the buffer and insert the new entry. | |||||
auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size); | |||||
if (!DevAccessiblePtrOrErr) | |||||
return DevAccessiblePtrOrErr.takeError(); | |||||
return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size); | |||||
} | |||||
Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) { | |||||
assert(HstPtr && "Invalid pointer"); | |||||
std::lock_guard<std::shared_mutex> Lock(Mutex); | |||||
// Check whether there is any intersecting entry. | |||||
const EntryTy *Entry = findIntersecting(HstPtr); | |||||
// No entry but automatic locking of mapped buffers is disabled, so | |||||
// nothing to do. | |||||
if (!Entry && !OMPX_LockMappedBuffers) | |||||
return Plugin::success(); | |||||
// No entry, but the automatic locking is enabled, so this is an error. | |||||
if (!Entry) | |||||
return Plugin::error("Locked buffer not found"); | |||||
// There is entry, so unregister a user and check whether it was the last one. | |||||
auto LastUseOrErr = unregisterEntryUse(*Entry); | |||||
if (!LastUseOrErr) | |||||
return LastUseOrErr.takeError(); | |||||
// If it is not the last one, there is nothing to do. | |||||
if (!(*LastUseOrErr)) | |||||
return Plugin::success(); | |||||
// Otherwise, if it was the last and the buffer was locked by the plugin, | |||||
// unlock it. | |||||
if (!Entry->ExternallyLocked) | |||||
if (auto Err = Device.dataUnlockImpl(Entry->HstPtr)) | |||||
return Err; | |||||
// Finally erase the entry from the map. | |||||
return eraseEntry(*Entry); | |||||
} | } | ||||
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { | Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { | ||||
if (!AsyncInfo || !AsyncInfo->Queue) | if (!AsyncInfo || !AsyncInfo->Queue) | ||||
return Plugin::error("Invalid async info queue"); | return Plugin::error("Invalid async info queue"); | ||||
return synchronizeImpl(*AsyncInfo); | return synchronizeImpl(*AsyncInfo); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | if (Kind == TARGET_ALLOC_HOST) | ||||
if (auto Err = PinnedAllocs.unregisterHostBuffer(TgtPtr)) | if (auto Err = PinnedAllocs.unregisterHostBuffer(TgtPtr)) | ||||
return Err; | return Err; | ||||
return Plugin::success(); | return Plugin::success(); | ||||
} | } | ||||
Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr, | Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr, | ||||
int64_t Size, __tgt_async_info *AsyncInfo) { | int64_t Size, __tgt_async_info *AsyncInfo) { | ||||
auto Err = Plugin::success(); | auto Err = Plugin::success(); | ||||
ye-luo: I don't understand this part of code.
The implementation actually lock the host buffer instead… | |||||
While it is mapped, we want to keep it (by default) pinned. The escape hatch (env var) is missing though. jdoerfert: While it is mapped, we want to keep it (by default) pinned. The escape hatch (env var) is… | |||||
Unconditionally pinning mapped host memory should be an option to opt-in rather than default. ye-luo: Unconditionally pinning mapped host memory should be an option to opt-in rather than default. | |||||
Mapping scalars as firstprivate or as tofrom? The former would not go through this mechanism. The latter makes reasonable sense to pin the page, no? I agree we need an env var, default is the question. jdoerfert: Mapping scalars as firstprivate or as tofrom? The former would not go through this mechanism. | |||||
I was referring to map(tofrom) scalars. firstprivte is not considered a map technically. I don't think you want to pay the cost of pinning 10 pages due to 10 scalars. Pinning is very expensive (interacting with OS). ye-luo: I was referring to `map(tofrom)` scalars. firstprivte is not considered a map technically. I… | |||||
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); | AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); | ||||
Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); | Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); | ||||
return Err; | return Err; | ||||
} | } | ||||
Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr, | Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr, | ||||
int64_t Size, __tgt_async_info *AsyncInfo) { | int64_t Size, __tgt_async_info *AsyncInfo) { | ||||
auto Err = Plugin::success(); | auto Err = Plugin::success(); | ||||
▲ Show 20 Lines • Show All 333 Lines • ▼ Show 20 Lines | if (Err) { | ||||
REPORT("Failure to unlock memory %p: %s\n", Ptr, | REPORT("Failure to unlock memory %p: %s\n", Ptr, | ||||
toString(std::move(Err)).data()); | toString(std::move(Err)).data()); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
return OFFLOAD_SUCCESS; | return OFFLOAD_SUCCESS; | ||||
} | } | ||||
int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr, | |||||
int64_t Size) { | |||||
auto Err = Plugin::get().getDevice(DeviceId).notifyDataMapped(HstPtr, Size); | |||||
if (Err) { | |||||
REPORT("Failure to notify data mapped %p: %s\n", HstPtr, | |||||
toString(std::move(Err)).data()); | |||||
return OFFLOAD_FAIL; | |||||
} | |||||
return OFFLOAD_SUCCESS; | |||||
} | |||||
int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) { | |||||
auto Err = Plugin::get().getDevice(DeviceId).notifyDataUnmapped(HstPtr); | |||||
if (Err) { | |||||
REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, | |||||
toString(std::move(Err)).data()); | |||||
return OFFLOAD_FAIL; | |||||
} | |||||
return OFFLOAD_SUCCESS; | |||||
} | |||||
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, | int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, | ||||
int64_t Size) { | int64_t Size) { | ||||
return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, | return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, | ||||
/* AsyncInfoPtr */ nullptr); | /* AsyncInfoPtr */ nullptr); | ||||
} | } | ||||
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, | int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, | ||||
void *HstPtr, int64_t Size, | void *HstPtr, int64_t Size, | ||||
▲ Show 20 Lines • Show All 201 Lines • Show Last 20 Lines |
I don't understand this part of code.
The implementation actually lock the host buffer instead of just notifying.