diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -109,3 +109,12 @@ `extensions to the OpenMP begin/end declare variant context selector `__ that are exposed through LLVM/Clang to the user as well. + +Q: What is a way to debug errors from mapping memory to a target device? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An experimental way to debug these errors is to use :ref:`remote process +offloading `. +By using ``libomptarget.rtl.rpc.so`` and ``openmp-offloading-server``, it is +possible to explicitly perform memory transfers between processes on the host +CPU and run sanitizers while doing so in order to catch these errors. \ No newline at end of file diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -273,6 +273,62 @@ .. _device_runtime: + +.. _remote_offloading_plugin: + +Remote Offloading Plugin: +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The remote offloading plugin permits the execution of OpenMP target regions +on devices in remote hosts in addition to the devices connected to the local +host. All target devices on the remote host will be exposed to the +application as if they were local devices, that is, the remote host CPU or +its GPUs can be offloaded to with the appropriate device number. If the +server is running on the same host, each device may be identified twice: +once through the device plugins and once through the device plugins that the +server application has access to. + +This plugin consists of ``libomptarget.rtl.rpc.so`` and +``openmp-offloading-server`` which should be running on the (remote) host. The +server application does not have to be running on a remote host, and can +instead be used on the same host in order to debug memory mapping during offloading. +These are implemented via gRPC/protobuf so these libraries are required to +build and use this plugin. The server must also have access to the necessary +target-specific plugins in order to perform the offloading. + +Due to the experimental nature of this plugin, the CMake variable +``LIBOMPTARGET_ENABLE_EXPERIMENTAL_REMOTE_PLUGIN`` must be set in order to +build this plugin. For example, the rpc plugin is not designed to be +thread-safe, the server cannot concurrently handle offloading from multiple +applications at once (it is synchronous) and will terminate after a single +execution. Note that ``openmp-offloading-server`` is unable to +remote offload onto a remote host itself and will error out if this is attempted. + +Remote offloading is configured via environment variables at runtime of the OpenMP application: + * ``LIBOMPTARGET_RPC_ADDRESS=
:`` + * ``LIBOMPTARGET_RPC_ALLOCATOR_MAX=`` + * ``LIBOMPTARGET_BLOCK_SIZE=`` + * ``LIBOMPTARGET_RPC_LATENCY=`` + +LIBOMPTARGET_RPC_ADDRESS +"""""""""""""""""""""""" +The address and port at which the server is running. This needs to be set for +the server and the application, the default is ``0.0.0.0:50051``. A single +OpenMP executable can offload onto multiple remote hosts by setting this to +comma-seperated values of the addresses. + +LIBOMPTARGET_RPC_ALLOCATOR_MAX +"""""""""""""""""""""""""""""" +After allocating this size, the protobuf allocator will clear. This can be set for both endpoints. + +LIBOMPTARGET_BLOCK_SIZE +""""""""""""""""""""""" +This is the maximum size of a single message while streaming data transfers between the two endpoints and can be set for both endpoints. + +LIBOMPTARGET_RPC_LATENCY +"""""""""""""""""""""""" +This is the maximum amount of time the client will wait for a response from the server. + LLVM/OpenMP Target Device Runtime (``libomptarget-ARCH-SUBARCH.bc``) -------------------------------------------------------------------- diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -74,6 +74,7 @@ add_subdirectory(ppc64le) add_subdirectory(ve) add_subdirectory(x86_64) +add_subdirectory(remote) # Make sure the parent scope can see the plugins that will be created. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE) diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -19,6 +19,8 @@ __tgt_rtl_run_target_region; __tgt_rtl_run_target_region_async; __tgt_rtl_synchronize; + __tgt_rtl_register_lib; + __tgt_rtl_unregister_lib; local: *; }; diff --git a/openmp/libomptarget/plugins/remote/CMakeLists.txt b/openmp/libomptarget/plugins/remote/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/CMakeLists.txt @@ -0,0 +1,55 @@ +##===----------------------------------------------------------------------===## +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Build a plugin (client) and server for remote offloading. +# +##===----------------------------------------------------------------------===# + +cmake_minimum_required(VERSION 3.15) + +if (NOT(CMAKE_SYSTEM_NAME MATCHES "Linux")) + libomptarget_say("Not building remote offloading plugin: only support Linux hosts.") + return() +endif() + +if (NOT(LIBOMPTARGET_ENABLE_EXPERIMENTAL_REMOTE_PLUGIN)) + return() +endif() + +find_package(Protobuf) +find_package(gRPC CONFIG) + +find_program(PROTOC protoc) +find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin) + +if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND GRPC_CPP_PLUGIN) + libomptarget_say("Building remote offloading plugin.") + set(directory "${CMAKE_BINARY_DIR}/include/openmp/libomptarget/plugins/remote/") + file(MAKE_DIRECTORY ${directory}) + execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${directory}) + execute_process( + COMMAND protoc --cpp_out=${directory} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/openmp.proto + COMMAND protoc --grpc_out=${directory} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/openmp.proto --plugin=protoc-gen-grpc=${GRPC_CPP_PLUGIN} + ) + + set(GRPC_SRC_FILES + ${directory}/openmp.grpc.pb.cc + ${directory}/openmp.pb.cc + ) + + set(GRPC_INCLUDE_DIR + ${directory} + ) +else() + libomptarget_say("Not building remote offloading plugin: required libraries were not found.") +endif() + +set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) +set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/) + +add_subdirectory(src) +add_subdirectory(server) diff --git a/openmp/libomptarget/plugins/remote/include/Utils.h b/openmp/libomptarget/plugins/remote/include/Utils.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/include/Utils.h @@ -0,0 +1,114 @@ +//===----------------- Utils.h - Utilities for Remote RTL -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for data transfer through protobuf and debugging. +// +//===----------------------------------------------------------------------===// + +#ifndef UTILS_H +#define UTILS_H + +#include "Debug.h" +#include "omptarget.h" +#include "openmp.grpc.pb.h" +#include "openmp.pb.h" +#include "rtl.h" +#include + +#define CLIENT_DBG(...) \ + { \ + if (DebugLevel > 0) { \ + fprintf(stderr, "[[Client]] --> "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } \ + } + +#define SERVER_DBG(...) \ + { \ + if (DebugLevel > 0) { \ + fprintf(stderr, "[[Server]] --> "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } \ + } + +namespace RemoteOffloading { + +using namespace openmp::libomptarget::remote; + +using openmp::libomptarget::remote::DeviceOffloadEntry; +using openmp::libomptarget::remote::TargetBinaryDescription; +using openmp::libomptarget::remote::TargetOffloadEntry; +using openmp::libomptarget::remote::TargetTable; + +struct RPCConfig { + std::vector ServerAddresses; + uint64_t MaxSize; + uint64_t BlockSize; + RPCConfig() { + ServerAddresses = {"0.0.0.0:50051"}; + MaxSize = 1 << 30; + BlockSize = 1 << 20; + } +}; + +/// Helper function to parse common environment variables between client/server +void parseEnvironment(RPCConfig &Config); + +/// Loads a target binary description into protobuf. +void loadTargetBinaryDescription(const __tgt_bin_desc *Desc, + TargetBinaryDescription &Request); + +/// Unload a target binary description from protobuf. The map is used to keep +/// track of already copied device images. +void unloadTargetBinaryDescription( + const TargetBinaryDescription *Request, __tgt_bin_desc *Desc, + std::unordered_map + &HostToRemoteDeviceImage); + +/// Frees argument as constructed by loadTargetBinaryDescription +void freeTargetBinaryDescription(__tgt_bin_desc *Desc); + +/// Copies from TargetOffloadEntry protobuf to a tgt_bin_desc during unloading. +void copyOffloadEntry(const TargetOffloadEntry &EntryResponse, + __tgt_offload_entry *Entry); + +/// Copies from tgt_bin_desc into TargetOffloadEntry protobuf during loading. +void copyOffloadEntry(const __tgt_offload_entry *Entry, + TargetOffloadEntry *EntryResponse); + +/// Shallow copy of offload entry from tgt_bin_desc to TargetOffloadEntry +/// during loading. +void shallowCopyOffloadEntry(const __tgt_offload_entry *Entry, + TargetOffloadEntry *EntryResponse); + +/// Copies DeviceOffloadEntries into table during unloading. +void copyOffloadEntry(const DeviceOffloadEntry &EntryResponse, + __tgt_offload_entry *Entry); + +/// Loads tgt_target_table into a TargetTable protobuf message. +void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse, + __tgt_device_image *Image); + +/// Unloads from a target_table from protobuf. +void unloadTargetTable( + TargetTable &TableResponse, __tgt_target_table *Table, + std::unordered_map &HostToRemoteTargetTableMap); + +/// Frees argument as constructed by unloadTargetTable +void freeTargetTable(__tgt_target_table *Table); + +void dump(const void *Start, const void *End); +void dump(__tgt_offload_entry *Entry); +void dump(TargetOffloadEntry Entry); +void dump(__tgt_target_table *Table); +void dump(__tgt_device_image *Image); +} // namespace RemoteOffloading + +#endif diff --git a/openmp/libomptarget/plugins/remote/include/openmp.proto b/openmp/libomptarget/plugins/remote/include/openmp.proto new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/include/openmp.proto @@ -0,0 +1,164 @@ +syntax = "proto3"; + +package openmp.libomptarget.remote; +option cc_enable_arenas = true; + +service RemoteOffload { + rpc Shutdown(Null) returns (I32) {} + + rpc RegisterLib(TargetBinaryDescription) returns (I32) {} + rpc UnregisterLib(Pointer) returns (I32) {} + + rpc IsValidBinary(TargetDeviceImagePtr) returns (I32) {} + rpc GetNumberOfDevices(Null) returns (I32) {} + + rpc InitDevice(I32) returns (I32) {} + rpc InitRequires(I64) returns (I32) {} + + rpc LoadBinary(Binary) returns (TargetTable) {} + rpc Synchronize(SynchronizeDevice) returns (I32) {} + + rpc DataAlloc(AllocData) returns (Pointer) {} + rpc DataDelete(DeleteData) returns (I32) {} + + rpc DataSubmitAsync(stream SubmitDataAsync) returns (I32) {} + rpc DataRetrieveAsync(RetrieveDataAsync) returns (stream Data) {} + + rpc IsDataExchangeable(DevicePair) returns (I32) {} + rpc DataExchangeAsync(ExchangeDataAsync) returns (I32) {} + + rpc RunTargetRegionAsync(TargetRegionAsync) returns (I32) {} + rpc RunTargetTeamRegionAsync(TargetTeamRegionAsync) returns (I32) {} +} + +message Null {} + +message Pointer { uint64 number = 1; } + +message I32 { int32 number = 1; } + +message I64 { int64 number = 1; } + +message DevicePair { + int32 src_dev_id = 1; + int32 dst_dev_id = 2; +} + +message Binary { + uint64 image_ptr = 1; + int32 device_id = 2; +} + +message TargetOffloadEntry { + bytes data = 1; + string name = 2; + int32 flags = 3; + int32 reserved = 4; +} + +message DeviceOffloadEntry { + string name = 1; + uint64 addr = 2; + int32 flags = 3; + int32 reserved = 4; + int32 size = 5; +} + +message TargetTable { + repeated DeviceOffloadEntry entries = 1; + repeated uint64 entry_ptrs = 2; +} + +message TargetDeviceImagePtr { + uint64 image_ptr = 1; + repeated uint64 entry_ptrs = 2; +} + +message TargetDeviceImage { + bytes binary = 1; + repeated TargetOffloadEntry entries = 2; +} + +message ImagePtrs { + uint64 img_ptr = 1; + repeated uint64 entry_ptrs = 2; +} + +message TargetBinaryDescription { + repeated ImagePtrs image_ptrs = 1; + repeated TargetOffloadEntry entries = 2; + repeated TargetDeviceImage images = 3; + repeated uint64 entry_ptrs = 4; + uint64 bin_ptr = 5; +} + +message SynchronizeDevice { + uint64 queue_ptr = 1; + int32 device_id = 2; +} + +message AllocData { + uint64 size = 1; + uint64 hst_ptr = 2; + int32 device_id = 3; +} + +message SubmitDataAsync { + bytes data = 1; + uint64 hst_ptr = 2; + uint64 tgt_ptr = 3; + uint64 queue_ptr = 4; + uint64 start = 5; + uint64 size = 6; + int32 device_id = 7; +} + +message RetrieveDataAsync { + uint64 hst_ptr = 1; + uint64 tgt_ptr = 2; + uint64 size = 3; + uint64 queue_ptr = 4; + int32 device_id = 5; +} + +message Data { + bytes data = 1; + uint64 start = 2; + uint64 size = 3; + int32 ret = 4; +} + +message ExchangeDataAsync { + uint64 src_dev_id = 1; + uint64 src_ptr = 2; + uint64 dst_dev_id = 3; + uint64 dst_ptr = 4; + uint64 queue_ptr = 5; + uint64 size = 6; +} + +message DeleteData { + uint64 tgt_ptr = 1; + int32 device_id = 2; +} + +message TargetRegionAsync { + repeated uint64 tgt_args = 1; + repeated int64 tgt_offsets = 2; + uint64 tgt_entry_ptr = 3; + uint64 queue_ptr = 4; + int32 device_id = 5; + int32 arg_num = 6; +} + +message TargetTeamRegionAsync { + repeated uint64 tgt_args = 1; + repeated int64 tgt_offsets = 2; + uint64 tgt_entry_ptr = 3; + uint64 loop_tripcount = 4; + uint64 queue_ptr = 5; + int32 device_id = 6; + int32 arg_num = 7; + int32 team_num = 8; + int32 thread_limit = 9; +} diff --git a/openmp/libomptarget/plugins/remote/lib/Utils.cpp b/openmp/libomptarget/plugins/remote/lib/Utils.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/lib/Utils.cpp @@ -0,0 +1,316 @@ +//===---------------- Utils.cpp - Utilities for Remote RTL ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for data movement and debugging. +// +//===----------------------------------------------------------------------===// + +#include "Utils.h" +#include "omptarget.h" + +namespace RemoteOffloading { +void parseEnvironment(RPCConfig &Config) { + // TODO: Error handle for incorrect inputs + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ADDRESS")) { + Config.ServerAddresses.clear(); + std::string AddressString = Env; + const std::string Delimiter = ","; + + size_t Pos = 0; + std::string Token; + while ((Pos = AddressString.find(Delimiter)) != std::string::npos) { + Token = AddressString.substr(0, Pos); + Config.ServerAddresses.push_back(Token); + AddressString.erase(0, Pos + Delimiter.length()); + } + Config.ServerAddresses.push_back(AddressString); + } + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ALLOCATOR_MAX")) + Config.MaxSize = std::stoi(Env); + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_BLOCK_SIZE")) + Config.BlockSize = std::stoi(Env); +} + +void loadTargetBinaryDescription(const __tgt_bin_desc *Desc, + TargetBinaryDescription &Request) { + // Keeps track of entries which have already been deep copied. + std::vector DeepCopiedEntryAddrs; + + // Copy Global Offload Entries + for (auto *CurEntry = Desc->HostEntriesBegin; + CurEntry != Desc->HostEntriesEnd; CurEntry++) { + auto *NewEntry = Request.add_entries(); + copyOffloadEntry(CurEntry, NewEntry); + + // Copy the pointer of the offload entry of the image into the Request + Request.add_entry_ptrs((uint64_t)CurEntry); + DeepCopiedEntryAddrs.push_back(CurEntry); + } + + // Copy Device Images and Device Offload Entries + __tgt_device_image *CurImage = Desc->DeviceImages; + for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++) { + auto *Image = Request.add_images(); + auto Size = (char *)CurImage->ImageEnd - (char *)CurImage->ImageStart; + Image->set_binary(CurImage->ImageStart, Size); + + // Copy the pointer of the image into the Request + auto *NewImagePtr = Request.add_image_ptrs(); + NewImagePtr->set_img_ptr((uint64_t)CurImage->ImageStart); + + // Copy Device Offload Entries + for (auto *CurEntry = CurImage->EntriesBegin; + CurEntry != CurImage->EntriesEnd; CurEntry++) { + auto *NewEntry = Image->add_entries(); + + auto Entry = std::find(DeepCopiedEntryAddrs.begin(), + DeepCopiedEntryAddrs.end(), CurEntry); + if (Entry != DeepCopiedEntryAddrs.end()) { + // Offload entry has already been loaded + shallowCopyOffloadEntry(CurEntry, NewEntry); + } else { // Offload Entry has not been loaded into the Request + copyOffloadEntry(CurEntry, NewEntry); + DeepCopiedEntryAddrs.push_back(CurEntry); + } + + // Copy the pointer of the offload entry of the image into the Request + NewImagePtr->add_entry_ptrs((uint64_t)CurEntry); + } + } +} + +void unloadTargetBinaryDescription( + const TargetBinaryDescription *Request, __tgt_bin_desc *Desc, + std::unordered_map + &HostToRemoteDeviceImage) { + std::unordered_map CopiedOffloadEntries; + Desc->NumDeviceImages = Request->images_size(); + Desc->DeviceImages = new __tgt_device_image[Desc->NumDeviceImages]; + + if (Request->entries_size()) + Desc->HostEntriesBegin = new __tgt_offload_entry[Request->entries_size()]; + else { + Desc->HostEntriesBegin = nullptr; + Desc->HostEntriesEnd = nullptr; + } + + // Copy Global Offload Entries + __tgt_offload_entry *CurEntry = Desc->HostEntriesBegin; + for (int i = 0; i < Request->entries_size(); i++) { + copyOffloadEntry(Request->entries()[i], CurEntry); + CopiedOffloadEntries[(void *)Request->entry_ptrs()[i]] = CurEntry; + CurEntry++; + } + Desc->HostEntriesEnd = CurEntry; + + // Copy Device Images and Device Offload Entries + __tgt_device_image *CurImage = Desc->DeviceImages; + auto ImageItr = Request->image_ptrs().begin(); + for (auto Image : Request->images()) { + // Copy Device Offload Entries + auto *CurEntry = Desc->HostEntriesBegin; + bool Found = false; + + if (!Desc->HostEntriesBegin) { + CurImage->EntriesBegin = nullptr; + CurImage->EntriesEnd = nullptr; + } + + for (int i = 0; i < Image.entries_size(); i++) { + auto TgtEntry = + CopiedOffloadEntries.find((void *)Request->entry_ptrs()[i]); + if (TgtEntry != CopiedOffloadEntries.end()) { + if (!Found) + CurImage->EntriesBegin = CurEntry; + + Found = true; + if (Found) { + CurImage->EntriesEnd = CurEntry + 1; + } + } else { + Found = false; + copyOffloadEntry(Image.entries()[i], CurEntry); + CopiedOffloadEntries[(void *)(Request->entry_ptrs()[i])] = CurEntry; + } + CurEntry++; + } + + // Copy Device Image + CurImage->ImageStart = new uint8_t[Image.binary().size()]; + memcpy(CurImage->ImageStart, + static_cast(Image.binary().data()), + Image.binary().size()); + CurImage->ImageEnd = + (void *)((char *)CurImage->ImageStart + Image.binary().size()); + + HostToRemoteDeviceImage[(void *)ImageItr->img_ptr()] = CurImage; + CurImage++; + ImageItr++; + } +} + +void freeTargetBinaryDescription(__tgt_bin_desc *Desc) { + __tgt_device_image *CurImage = Desc->DeviceImages; + for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++) + delete[](uint64_t *) CurImage->ImageStart; + + delete[] Desc->DeviceImages; + + for (auto *Entry = Desc->HostEntriesBegin; Entry != Desc->HostEntriesEnd; + Entry++) { + free(Entry->name); + free(Entry->addr); + } + + delete[] Desc->HostEntriesBegin; +} + +void freeTargetTable(__tgt_target_table *Table) { + for (auto *Entry = Table->EntriesBegin; Entry != Table->EntriesEnd; Entry++) + free(Entry->name); + + delete[] Table->EntriesBegin; +} + +void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse, + __tgt_device_image *Image) { + auto *ImageEntry = Image->EntriesBegin; + for (__tgt_offload_entry *CurEntry = Table->EntriesBegin; + CurEntry != Table->EntriesEnd; CurEntry++, ImageEntry++) { + // TODO: This can probably be trimmed substantially. + auto *NewEntry = TableResponse.add_entries(); + NewEntry->set_name(CurEntry->name); + NewEntry->set_addr((uint64_t)CurEntry->addr); + NewEntry->set_flags(CurEntry->flags); + NewEntry->set_reserved(CurEntry->reserved); + NewEntry->set_size(CurEntry->size); + TableResponse.add_entry_ptrs((int64_t)CurEntry); + } +} + +void unloadTargetTable( + TargetTable &TableResponse, __tgt_target_table *Table, + std::unordered_map &HostToRemoteTargetTableMap) { + Table->EntriesBegin = new __tgt_offload_entry[TableResponse.entries_size()]; + + auto *CurEntry = Table->EntriesBegin; + for (int i = 0; i < TableResponse.entries_size(); i++) { + copyOffloadEntry(TableResponse.entries()[i], CurEntry); + HostToRemoteTargetTableMap[CurEntry->addr] = + (void *)TableResponse.entry_ptrs()[i]; + CurEntry++; + } + Table->EntriesEnd = CurEntry; +} + +void copyOffloadEntry(const TargetOffloadEntry &EntryResponse, + __tgt_offload_entry *Entry) { + Entry->name = strdup(EntryResponse.name().c_str()); + Entry->reserved = EntryResponse.reserved(); + Entry->flags = EntryResponse.flags(); + Entry->addr = strdup(EntryResponse.data().c_str()); + Entry->size = EntryResponse.data().size(); +} + +void copyOffloadEntry(const DeviceOffloadEntry &EntryResponse, + __tgt_offload_entry *Entry) { + Entry->name = strdup(EntryResponse.name().c_str()); + Entry->reserved = EntryResponse.reserved(); + Entry->flags = EntryResponse.flags(); + Entry->addr = (void *)EntryResponse.addr(); + Entry->size = EntryResponse.size(); +} + +/// We shallow copy with just the name because it is a convenient identifier, we +/// do actually just match off of the address. +void shallowCopyOffloadEntry(const __tgt_offload_entry *Entry, + TargetOffloadEntry *EntryResponse) { + EntryResponse->set_name(Entry->name); +} + +void copyOffloadEntry(const __tgt_offload_entry *Entry, + TargetOffloadEntry *EntryResponse) { + shallowCopyOffloadEntry(Entry, EntryResponse); + EntryResponse->set_reserved(Entry->reserved); + EntryResponse->set_flags(Entry->flags); + EntryResponse->set_data(Entry->addr, Entry->size); +} + +/// Dumps the memory region from Start to End in order to debug memory transfer +/// errors within the plugin +void dump(const void *Start, const void *End) { + unsigned char Line[17]; + const unsigned char *PrintCharacter = (const unsigned char *)Start; + + unsigned int I = 0; + for (; I < ((const int *)End - (const int *)Start); I++) { + if ((I % 16) == 0) { + if (I != 0) + printf(" %s\n", Line); + + printf(" %04x ", I); + } + + printf(" %02x", PrintCharacter[I]); + + if ((PrintCharacter[I] < 0x20) || (PrintCharacter[I] > 0x7e)) + Line[I % 16] = '.'; + else + Line[I % 16] = PrintCharacter[I]; + + Line[(I % 16) + 1] = '\0'; + } + + while ((I % 16) != 0) { + printf(" "); + I++; + } + + printf(" %s\n", Line); +} + +void dump(__tgt_offload_entry *Entry) { + fprintf(stderr, "Entry (%p):\n", (void *)Entry); + fprintf(stderr, " Name: %s (%p)\n", Entry->name, (void *)&Entry->name); + fprintf(stderr, " Reserved: %d (%p)\n", Entry->reserved, + (void *)&Entry->reserved); + fprintf(stderr, " Flags: %d (%p)\n", Entry->flags, (void *)&Entry->flags); + fprintf(stderr, " Addr: %p\n", Entry->addr); + fprintf(stderr, " Size: %lu\n", Entry->size); +} + +void dump(__tgt_target_table *Table) { + for (auto *CurEntry = Table->EntriesBegin; CurEntry != Table->EntriesEnd; + CurEntry++) + dump(CurEntry); +} + +void dump(TargetOffloadEntry Entry) { + fprintf(stderr, "Entry: "); + fprintf(stderr, " %s\n", Entry.name().c_str()); + fprintf(stderr, " %d\n", Entry.reserved()); + fprintf(stderr, " %d\n", Entry.flags()); + fprintf(stderr, " %ld\n", Entry.data().size()); + dump(static_cast(Entry.data().data()), + static_cast((Entry.data().c_str() + Entry.data().size()))); +} + +void dump(__tgt_device_image *Image) { + dump(Image->ImageStart, Image->ImageEnd); + __tgt_offload_entry *EntryItr = Image->EntriesBegin; + for (; EntryItr != Image->EntriesEnd; EntryItr++) + dump(EntryItr); +} + +void dump(std::unordered_map &Map) { + fprintf(stderr, "Host to Remote Entry Map:\n"); + for (auto Entry : Map) + fprintf(stderr, " Host (%p) -> Tgt (%p): Addr((%p))\n", Entry.first, + (void *)Entry.second, (void *)Entry.second->addr); +} +} // namespace RemoteOffloading \ No newline at end of file diff --git a/openmp/libomptarget/plugins/remote/server/CMakeLists.txt b/openmp/libomptarget/plugins/remote/server/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/server/CMakeLists.txt @@ -0,0 +1,30 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Build server for remote offloading. +# +##===----------------------------------------------------------------------===## + +include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS}) +include_directories(${LIBOMPTARGET_SRC_DIR}) +include_directories(${LIBOMPTARGET_INCLUDE_DIR}) +include_directories(${GRPC_INCLUDE_DIR}) +include_directories(${RPC_INCLUDE_DIR}) + +add_executable(openmp-offloading-server + ${LIBOMPTARGET_SRC_FILES} + ${GRPC_SRC_FILES} + ${RPC_SRC_DIR}/Utils.cpp + Server.cpp + OffloadingServer.cpp +) + +target_link_libraries(openmp-offloading-server + grpc++ + protobuf + "-ldl" "-lomp" "-fopenmp" "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../exports" ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES}) diff --git a/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp b/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp @@ -0,0 +1,52 @@ +//===------------- OffloadingServer.cpp - Server Application --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Offloading server for remote host. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "Server.h" + +using grpc::Server; +using grpc::ServerBuilder; + +std::promise ShutdownPromise; + +int main() { + RPCConfig Config; + parseEnvironment(Config); + + RemoteOffloadImpl Service(Config.MaxSize, Config.BlockSize); + + ServerBuilder Builder; + Builder.AddListeningPort(Config.ServerAddresses[0], + grpc::InsecureServerCredentials()); + Builder.RegisterService(&Service); + Builder.SetMaxMessageSize(INT_MAX); + std::unique_ptr Server(Builder.BuildAndStart()); + if (getDebugLevel()) + std::cerr << "Server listening on " << Config.ServerAddresses[0] + << std::endl; + + auto WaitForServer = [&]() { Server->Wait(); }; + + std::thread ServerThread(WaitForServer); + + auto ShutdownFuture = ShutdownPromise.get_future(); + ShutdownFuture.wait(); + Server->Shutdown(); + ServerThread.join(); + + return 0; +} diff --git a/openmp/libomptarget/plugins/remote/server/Server.h b/openmp/libomptarget/plugins/remote/server/Server.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/server/Server.h @@ -0,0 +1,114 @@ +//===-------------------------- Server.h - Server -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Offloading gRPC server for remote host. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SERVER_SERVER_H +#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SERVER_SERVER_H + +#include + +#include "Utils.h" +#include "device.h" +#include "omptarget.h" +#include "openmp.grpc.pb.h" +#include "openmp.pb.h" +#include "rtl.h" + +using grpc::ServerContext; +using grpc::ServerReader; +using grpc::ServerWriter; +using grpc::Status; + +using namespace openmp::libomptarget::remote; +using namespace RemoteOffloading; + +using namespace google; + +extern PluginManager *PM; + +class RemoteOffloadImpl final : public RemoteOffload::Service { +private: + int32_t mapHostRTLDeviceId(int32_t RTLDeviceID); + + std::unordered_map + HostToRemoteDeviceImage; + std::unordered_map + HostToRemoteOffloadEntry; + std::unordered_map> + Descriptions; + __tgt_target_table *Table = nullptr; + + int DebugLevel; + uint64_t MaxSize; + uint64_t BlockSize; + std::unique_ptr Arena; + +public: + RemoteOffloadImpl(uint64_t MaxSize, uint64_t BlockSize) + : MaxSize(MaxSize), BlockSize(BlockSize) { + DebugLevel = getDebugLevel(); + Arena = std::make_unique(); + } + + Status Shutdown(ServerContext *Context, const Null *Request, + I32 *Reply) override; + + Status RegisterLib(ServerContext *Context, + const TargetBinaryDescription *Description, + I32 *Reply) override; + Status UnregisterLib(ServerContext *Context, const Pointer *Request, + I32 *Reply) override; + + Status IsValidBinary(ServerContext *Context, + const TargetDeviceImagePtr *Image, + I32 *IsValid) override; + Status GetNumberOfDevices(ServerContext *Context, const Null *Null, + I32 *NumberOfDevices) override; + + Status InitDevice(ServerContext *Context, const I32 *DeviceNum, + I32 *Reply) override; + Status InitRequires(ServerContext *Context, const I64 *RequiresFlag, + I32 *Reply) override; + + Status LoadBinary(ServerContext *Context, const Binary *Binary, + TargetTable *Reply) override; + Status Synchronize(ServerContext *Context, const SynchronizeDevice *Info, + I32 *Reply) override; + Status IsDataExchangeable(ServerContext *Context, const DevicePair *Request, + I32 *Reply) override; + + Status DataAlloc(ServerContext *Context, const AllocData *Request, + Pointer *Reply) override; + + Status DataSubmitAsync(ServerContext *Context, + ServerReader *Reader, + I32 *Reply) override; + Status DataRetrieveAsync(ServerContext *Context, + const RetrieveDataAsync *Request, + ServerWriter *Writer) override; + + Status DataExchangeAsync(ServerContext *Context, + const ExchangeDataAsync *Request, + I32 *Reply) override; + + Status DataDelete(ServerContext *Context, const DeleteData *Request, + I32 *Reply) override; + + Status RunTargetRegionAsync(ServerContext *Context, + const TargetRegionAsync *Request, + I32 *Reply) override; + + Status RunTargetTeamRegionAsync(ServerContext *Context, + const TargetTeamRegionAsync *Request, + I32 *Reply) override; +}; + +#endif diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -0,0 +1,424 @@ +//===----------------- Server.cpp - Server Implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Offloading gRPC server for remote host. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "Server.h" +#include "omptarget.h" +#include "openmp.grpc.pb.h" +#include "openmp.pb.h" + +using grpc::WriteOptions; + +extern std::promise ShutdownPromise; + +Status RemoteOffloadImpl::Shutdown(ServerContext *Context, const Null *Request, + I32 *Reply) { + SERVER_DBG("Shutting down the server"); + + Reply->set_number(0); + ShutdownPromise.set_value(); + return Status::OK; +} + +Status +RemoteOffloadImpl::RegisterLib(ServerContext *Context, + const TargetBinaryDescription *Description, + I32 *Reply) { + SERVER_DBG("Registering library"); + + auto Desc = std::make_unique<__tgt_bin_desc>(); + + unloadTargetBinaryDescription(Description, Desc.get(), + HostToRemoteDeviceImage); + PM->RTLs.RegisterLib(Desc.get()); + + if (Descriptions.find((void *)Description->bin_ptr()) != Descriptions.end()) + freeTargetBinaryDescription( + Descriptions[(void *)Description->bin_ptr()].get()); + else + Descriptions[(void *)Description->bin_ptr()] = std::move(Desc); + + SERVER_DBG("Registered library"); + Reply->set_number(0); + return Status::OK; +} + +Status RemoteOffloadImpl::UnregisterLib(ServerContext *Context, + const Pointer *Request, I32 *Reply) { + SERVER_DBG("Unregistering library"); + + if (Descriptions.find((void *)Request->number()) == Descriptions.end()) { + Reply->set_number(1); + return Status::OK; + } + + PM->RTLs.UnregisterLib(Descriptions[(void *)Request->number()].get()); + freeTargetBinaryDescription(Descriptions[(void *)Request->number()].get()); + Descriptions.erase((void *)Request->number()); + + SERVER_DBG("Unregistered library"); + Reply->set_number(0); + return Status::OK; +} + +Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context, + const TargetDeviceImagePtr *DeviceImage, + I32 *IsValid) { + SERVER_DBG("Checking if binary (%p) is valid", + (void *)(DeviceImage->image_ptr())); + + __tgt_device_image *Image = + HostToRemoteDeviceImage[(void *)DeviceImage->image_ptr()]; + + IsValid->set_number(0); + + for (auto &RTL : PM->RTLs.AllRTLs) + if (auto Ret = RTL.is_valid_binary(Image)) { + IsValid->set_number(Ret); + break; + } + + SERVER_DBG("Checked if binary (%p) is valid", + (void *)(DeviceImage->image_ptr())); + return Status::OK; +} + +Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context, + const Null *Null, + I32 *NumberOfDevices) { + SERVER_DBG("Getting number of devices"); + std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); + + int32_t Devices = 0; + PM->RTLsMtx.lock(); + for (auto &RTL : PM->RTLs.AllRTLs) + Devices += RTL.NumberOfDevices; + PM->RTLsMtx.unlock(); + + NumberOfDevices->set_number(Devices); + + SERVER_DBG("Got number of devices"); + return Status::OK; +} + +Status RemoteOffloadImpl::InitDevice(ServerContext *Context, + const I32 *DeviceNum, I32 *Reply) { + SERVER_DBG("Initializing device %d", DeviceNum->number()); + + Reply->set_number(PM->Devices[DeviceNum->number()].RTL->init_device( + mapHostRTLDeviceId(DeviceNum->number()))); + + SERVER_DBG("Initialized device %d", DeviceNum->number()); + return Status::OK; +} + +Status RemoteOffloadImpl::InitRequires(ServerContext *Context, + const I64 *RequiresFlag, I32 *Reply) { + SERVER_DBG("Initializing requires for devices"); + + for (auto &Device : PM->Devices) + if (Device.RTL->init_requires) + Device.RTL->init_requires(RequiresFlag->number()); + Reply->set_number(RequiresFlag->number()); + + SERVER_DBG("Initialized requires for devices"); + return Status::OK; +} + +Status RemoteOffloadImpl::LoadBinary(ServerContext *Context, + const Binary *Binary, TargetTable *Reply) { + SERVER_DBG("Loading binary (%p) to device %d", (void *)Binary->image_ptr(), + Binary->device_id()); + + __tgt_device_image *Image = + HostToRemoteDeviceImage[(void *)Binary->image_ptr()]; + + Table = PM->Devices[Binary->device_id()].RTL->load_binary( + mapHostRTLDeviceId(Binary->device_id()), Image); + if (Table) + loadTargetTable(Table, *Reply, Image); + + SERVER_DBG("Loaded binary (%p) to device %d", (void *)Binary->image_ptr(), + Binary->device_id()); + return Status::OK; +} + +Status RemoteOffloadImpl::Synchronize(ServerContext *Context, + const SynchronizeDevice *Info, + I32 *Reply) { + SERVER_DBG("Synchronizing device %d (probably won't work)", + Info->device_id()); + + void *AsyncInfoPtr = (void *)Info->queue_ptr(); + Reply->set_number(0); + if (PM->Devices[Info->device_id()].RTL->synchronize) + Reply->set_number(PM->Devices[Info->device_id()].synchronize( + (__tgt_async_info *)AsyncInfoPtr)); + + SERVER_DBG("Synchronized device %d", Info->device_id()); + return Status::OK; +} + +Status RemoteOffloadImpl::IsDataExchangeable(ServerContext *Context, + const DevicePair *Request, + I32 *Reply) { + SERVER_DBG("Checking if data exchangable between device %d and device %d", + Request->src_dev_id(), Request->dst_dev_id()); + + Reply->set_number(-1); + if (PM->Devices[mapHostRTLDeviceId(Request->src_dev_id())] + .RTL->is_data_exchangable) + Reply->set_number(PM->Devices[mapHostRTLDeviceId(Request->src_dev_id())] + .RTL->is_data_exchangable(Request->src_dev_id(), + Request->dst_dev_id())); + + SERVER_DBG("Checked if data exchangable between device %d and device %d", + Request->src_dev_id(), Request->dst_dev_id()); + return Status::OK; +} + +Status RemoteOffloadImpl::DataAlloc(ServerContext *Context, + const AllocData *Request, Pointer *Reply) { + SERVER_DBG("Allocating %ld bytes on sevice %d", Request->size(), + Request->device_id()); + + uint64_t TgtPtr = (uint64_t)PM->Devices[Request->device_id()].RTL->data_alloc( + mapHostRTLDeviceId(Request->device_id()), Request->size(), + (void *)Request->hst_ptr()); + Reply->set_number(TgtPtr); + + SERVER_DBG("Allocated at " DPxMOD "", DPxPTR((void *)TgtPtr)); + + return Status::OK; +} + +Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context, + ServerReader *Reader, + I32 *Reply) { + SubmitDataAsync Request; + uint8_t *HostCopy = nullptr; + while (Reader->Read(&Request)) { + if (Request.start() == 0 && Request.size() == Request.data().size()) { + SERVER_DBG("Submitting %lu bytes async to (%p) on device %d", + Request.data().size(), (void *)Request.tgt_ptr(), + Request.device_id()); + + SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(), + static_cast(Request.data().data())); + + Reader->SendInitialMetadata(); + + Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit( + mapHostRTLDeviceId(Request.device_id()), (void *)Request.tgt_ptr(), + (void *)Request.data().data(), Request.data().size())); + + SERVER_DBG("Submitted %lu bytes async to (%p) on device %d", + Request.data().size(), (void *)Request.tgt_ptr(), + Request.device_id()); + + return Status::OK; + } + if (!HostCopy) { + HostCopy = new uint8_t[Request.size()]; + Reader->SendInitialMetadata(); + } + + SERVER_DBG("Submitting %lu-%lu/%lu bytes async to (%p) on device %d", + Request.start(), Request.start() + Request.data().size(), + Request.size(), (void *)Request.tgt_ptr(), Request.device_id()); + + memcpy((void *)((char *)HostCopy + Request.start()), Request.data().data(), + Request.data().size()); + } + SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(), + static_cast(Request.data().data())); + + Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit( + mapHostRTLDeviceId(Request.device_id()), (void *)Request.tgt_ptr(), + HostCopy, Request.size())); + + delete[] HostCopy; + + SERVER_DBG("Submitted %lu bytes to (%p) on device %d", Request.data().size(), + (void *)Request.tgt_ptr(), Request.device_id()); + + return Status::OK; +} + +Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, + const RetrieveDataAsync *Request, + ServerWriter *Writer) { + auto HstPtr = std::make_unique(Request->size()); + auto Ret = PM->Devices[Request->device_id()].RTL->data_retrieve( + mapHostRTLDeviceId(Request->device_id()), HstPtr.get(), + (void *)Request->tgt_ptr(), Request->size()); + + if (Arena->SpaceAllocated() >= MaxSize) + Arena->Reset(); + + if (Request->size() > BlockSize) { + uint64_t Start = 0, End = BlockSize; + for (auto I = 0; I < ceil((float)Request->size() / BlockSize); I++) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + Reply->set_start(Start); + Reply->set_size(Request->size()); + Reply->set_data((char *)HstPtr.get() + Start, End - Start); + Reply->set_ret(Ret); + + SERVER_DBG("Retrieving %lu-%lu/%lu bytes from (%p) on device %d", Start, + End, Request->size(), (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + + if (!Writer->Write(*Reply)) { + CLIENT_DBG("Broken stream when submitting data"); + } + + SERVER_DBG("Retrieved %lu-%lu/%lu bytes from (%p) on device %d", Start, + End, Request->size(), (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + + Start += BlockSize; + End += BlockSize; + if (End >= Request->size()) + End = Request->size(); + } + } else { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + SERVER_DBG("Retrieve %lu bytes from (%p) on device %d", Request->size(), + (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + + Reply->set_start(0); + Reply->set_size(Request->size()); + Reply->set_data((char *)HstPtr.get(), Request->size()); + Reply->set_ret(Ret); + + SERVER_DBG("Retrieved %lu bytes from (%p) on device %d", Request->size(), + (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + + Writer->WriteLast(*Reply, WriteOptions()); + } + + return Status::OK; +} + +Status RemoteOffloadImpl::DataExchangeAsync(ServerContext *Context, + const ExchangeDataAsync *Request, + I32 *Reply) { + SERVER_DBG( + "Exchanging data asynchronously from device %d (%p) to device %d (%p) of " + "size %lu", + mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), + mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(), + Request->size()); + + if (PM->Devices[Request->src_dev_id()].RTL->data_exchange) { + int32_t Ret = PM->Devices[Request->src_dev_id()].RTL->data_exchange( + mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), + mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(), + Request->size()); + Reply->set_number(Ret); + } else + Reply->set_number(-1); + + SERVER_DBG( + "Exchanged data asynchronously from device %d (%p) to device %d (%p) of " + "size %lu", + mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), + mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(), + Request->size()); + return Status::OK; +} + +Status RemoteOffloadImpl::DataDelete(ServerContext *Context, + const DeleteData *Request, I32 *Reply) { + SERVER_DBG("Deleting data from (%p) on device %d", (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + + auto Ret = PM->Devices[Request->device_id()].RTL->data_delete( + mapHostRTLDeviceId(Request->device_id()), (void *)Request->tgt_ptr()); + Reply->set_number(Ret); + + SERVER_DBG("Deleted data from (%p) on device %d", (void *)Request->tgt_ptr(), + mapHostRTLDeviceId(Request->device_id())); + return Status::OK; +} + +Status RemoteOffloadImpl::RunTargetRegionAsync(ServerContext *Context, + const TargetRegionAsync *Request, + I32 *Reply) { + SERVER_DBG("Running TargetRegionAsync on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + + std::vector TgtArgs(Request->arg_num()); + for (auto I = 0; I < Request->arg_num(); I++) + TgtArgs[I] = (uint64_t)Request->tgt_args()[I]; + + std::vector TgtOffsets(Request->arg_num()); + const auto *TgtOffsetItr = Request->tgt_offsets().begin(); + for (auto I = 0; I < Request->arg_num(); I++, TgtOffsetItr++) + TgtOffsets[I] = (ptrdiff_t)*TgtOffsetItr; + + void *TgtEntryPtr = ((__tgt_offload_entry *)Request->tgt_entry_ptr())->addr; + + int32_t Ret = PM->Devices[Request->device_id()].RTL->run_region( + mapHostRTLDeviceId(Request->device_id()), TgtEntryPtr, + (void **)TgtArgs.data(), TgtOffsets.data(), Request->arg_num()); + + Reply->set_number(Ret); + + SERVER_DBG("Ran TargetRegionAsync on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + return Status::OK; +} + +Status RemoteOffloadImpl::RunTargetTeamRegionAsync( + ServerContext *Context, const TargetTeamRegionAsync *Request, I32 *Reply) { + SERVER_DBG("Running TargetTeamRegionAsync on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + + std::vector TgtArgs(Request->arg_num()); + for (auto I = 0; I < Request->arg_num(); I++) + TgtArgs[I] = (uint64_t)Request->tgt_args()[I]; + + std::vector TgtOffsets(Request->arg_num()); + const auto *TgtOffsetItr = Request->tgt_offsets().begin(); + for (auto I = 0; I < Request->arg_num(); I++, TgtOffsetItr++) + TgtOffsets[I] = (ptrdiff_t)*TgtOffsetItr; + + void *TgtEntryPtr = ((__tgt_offload_entry *)Request->tgt_entry_ptr())->addr; + int32_t Ret = PM->Devices[Request->device_id()].RTL->run_team_region( + mapHostRTLDeviceId(Request->device_id()), TgtEntryPtr, + (void **)TgtArgs.data(), TgtOffsets.data(), Request->arg_num(), + Request->team_num(), Request->thread_limit(), Request->loop_tripcount()); + + Reply->set_number(Ret); + + SERVER_DBG("Ran TargetTeamRegionAsync on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + return Status::OK; +} + +int32_t RemoteOffloadImpl::mapHostRTLDeviceId(int32_t RTLDeviceID) { + for (auto &RTL : PM->RTLs.UsedRTLs) { + if (RTLDeviceID - RTL->NumberOfDevices >= 0) + RTLDeviceID -= RTL->NumberOfDevices; + else + break; + } + return RTLDeviceID; +} diff --git a/openmp/libomptarget/plugins/remote/src/CMakeLists.txt b/openmp/libomptarget/plugins/remote/src/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/src/CMakeLists.txt @@ -0,0 +1,41 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Build a plugin for remote offloading. +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.15) + +# Define the suffix for the runtime messaging dumps. +add_definitions(-DTARGET_NAME=RPC) + +include_directories(${LIBOMPTARGET_SRC_DIR}) +include_directories(${LIBOMPTARGET_INCLUDE_DIR}) +include_directories(${GRPC_INCLUDE_DIR}) +include_directories(${RPC_INCLUDE_DIR}) + +add_library(omptarget.rtl.rpc SHARED + ${LIBOMPTARGET_SRC_FILES} + ${GRPC_SRC_FILES} + ${RPC_SRC_DIR}/Utils.cpp + Client.cpp + rtl.cpp +) + +# Install plugin under the lib destination folder. +install(TARGETS omptarget.rtl.rpc LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}") + +target_link_libraries(omptarget.rtl.rpc + grpc++ + protobuf + "-ldl" + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../exports") + +# Report to the parent scope that we are building a plugin for RPC. +set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} rpc" PARENT_SCOPE) diff --git a/openmp/libomptarget/plugins/remote/src/Client.h b/openmp/libomptarget/plugins/remote/src/Client.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/src/Client.h @@ -0,0 +1,167 @@ +//===------------------ Client.h - Client Implementation ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// gRPC Client for the remote plugin. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SRC_CLIENT_H +#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SRC_CLIENT_H + +#include "Utils.h" +#include "omptarget.h" +#include +#include +#include +#include +#include +#include +#include + +using grpc::Channel; +using openmp::libomptarget::remote::RemoteOffload; +using namespace RemoteOffloading; + +using namespace google; + +class RemoteOffloadClient { + const int Timeout; + + int DebugLevel; + uint64_t MaxSize; + int64_t BlockSize; + + std::unique_ptr Stub; + std::unique_ptr Arena; + + std::unique_ptr ArenaAllocatorLock; + + std::map> RemoteEntries; + std::map> DevicesToTables; + + template + auto remoteCall(Fn1 Preprocess, Fn2 Postprocess, TReturn ErrorValue, + bool Timeout = true); + +public: + RemoteOffloadClient(std::shared_ptr Channel, int Timeout, + uint64_t MaxSize, int64_t BlockSize) + : Timeout(Timeout), MaxSize(MaxSize), BlockSize(BlockSize), + Stub(RemoteOffload::NewStub(Channel)) { + DebugLevel = getDebugLevel(); + Arena = std::make_unique(); + ArenaAllocatorLock = std::make_unique(); + } + + RemoteOffloadClient(RemoteOffloadClient &&C) = default; + + ~RemoteOffloadClient() { + for (auto &TableIt : DevicesToTables) + freeTargetTable(TableIt.second.get()); + } + + int32_t shutdown(void); + + int32_t registerLib(__tgt_bin_desc *Desc); + int32_t unregisterLib(__tgt_bin_desc *Desc); + + int32_t isValidBinary(__tgt_device_image *Image); + int32_t getNumberOfDevices(); + + int32_t initDevice(int32_t DeviceId); + int32_t initRequires(int64_t RequiresFlags); + + __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); + int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); + int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); + + void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); + int32_t dataDelete(int32_t DeviceId, void *TgtPtr); + + int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size, __tgt_async_info *AsyncInfoPtr); + int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size, __tgt_async_info *AsyncInfoPtr); + + int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, + void *DstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr); + + int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, __tgt_async_info *AsyncInfoPtr); + + int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, uint64_t LoopTripCount, + __tgt_async_info *AsyncInfoPtr); +}; + +class RemoteClientManager { +private: + std::vector Addresses; + std::vector Clients; + std::vector Devices; + + std::pair mapDeviceId(int32_t DeviceId); + int DebugLevel; + +public: + RemoteClientManager(std::vector Addresses, int Timeout, + uint64_t MaxSize, int64_t BlockSize) + : Addresses(Addresses) { + grpc::ChannelArguments ChArgs; + ChArgs.SetMaxReceiveMessageSize(-1); + DebugLevel = getDebugLevel(); + for (auto Address : Addresses) { + Clients.push_back(RemoteOffloadClient( + grpc::CreateChannel(Address, grpc::InsecureChannelCredentials()), + Timeout, MaxSize, BlockSize)); + } + } + + int32_t shutdown(void); + + int32_t registerLib(__tgt_bin_desc *Desc); + int32_t unregisterLib(__tgt_bin_desc *Desc); + + int32_t isValidBinary(__tgt_device_image *Image); + int32_t getNumberOfDevices(); + + int32_t initDevice(int32_t DeviceId); + int32_t initRequires(int64_t RequiresFlags); + + __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); + int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr); + int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); + + void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); + int32_t dataDelete(int32_t DeviceId, void *TgtPtr); + + int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size, __tgt_async_info *AsyncInfoPtr); + int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size, __tgt_async_info *AsyncInfoPtr); + + int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, + void *DstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr); + + int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, __tgt_async_info *AsyncInfoPtr); + + int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, uint64_t LoopTripCount, + __tgt_async_info *AsyncInfoPtr); +}; + +#endif diff --git a/openmp/libomptarget/plugins/remote/src/Client.cpp b/openmp/libomptarget/plugins/remote/src/Client.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/src/Client.cpp @@ -0,0 +1,789 @@ +//===----------------- Client.cpp - Client Implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// gRPC (Client) for the remote plugin. +// +//===----------------------------------------------------------------------===// + +#include + +#include "Client.h" +#include "omptarget.h" +#include "openmp.pb.h" + +using namespace std::chrono; + +using grpc::ClientContext; +using grpc::ClientReader; +using grpc::ClientWriter; +using grpc::Status; + +template +auto RemoteOffloadClient::remoteCall(Fn1 Preprocess, Fn2 Postprocess, + TReturn ErrorValue, bool Timeout) { + ArenaAllocatorLock->lock(); + if (Arena->SpaceAllocated() >= MaxSize) + Arena->Reset(); + ArenaAllocatorLock->unlock(); + + ClientContext Context; + if (Timeout) { + auto Deadline = + std::chrono::system_clock::now() + std::chrono::seconds(Timeout); + Context.set_deadline(Deadline); + } + + Status RPCStatus; + auto Reply = Preprocess(RPCStatus, Context); + + // TODO: Error handle more appropriately + if (!RPCStatus.ok()) { + CLIENT_DBG("%s", RPCStatus.error_message().c_str()); + } else { + return Postprocess(Reply); + } + + CLIENT_DBG("Failed"); + return ErrorValue; +} + +int32_t RemoteOffloadClient::shutdown(void) { + ClientContext Context; + Null Request; + I32 Reply; + CLIENT_DBG("Shutting down server."); + auto Status = Stub->Shutdown(&Context, Request, &Reply); + if (Status.ok()) + return Reply.number(); + return 1; +} + +int32_t RemoteOffloadClient::registerLib(__tgt_bin_desc *Desc) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = protobuf::Arena::CreateMessage( + Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + loadTargetBinaryDescription(Desc, *Request); + Request->set_bin_ptr((uint64_t)Desc); + + CLIENT_DBG("Registering library"); + RPCStatus = Stub->RegisterLib(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (Reply->number() == 0) { + CLIENT_DBG("Registered library"); + return 0; + } + return 1; + }, + /* Error Value */ 1); +} + +int32_t RemoteOffloadClient::unregisterLib(__tgt_bin_desc *Desc) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_number((uint64_t)Desc); + + CLIENT_DBG("Unregistering library"); + RPCStatus = Stub->UnregisterLib(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (Reply->number() == 0) { + CLIENT_DBG("Unregistered library"); + return 0; + } + CLIENT_DBG("Failed to unregister library"); + return 1; + }, + /* Error Value */ 1); +} + +int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_image_ptr((uint64_t)Image->ImageStart); + + auto *EntryItr = Image->EntriesBegin; + while (EntryItr != Image->EntriesEnd) + Request->add_entry_ptrs((uint64_t)EntryItr++); + + CLIENT_DBG("Validating binary"); + RPCStatus = Stub->IsValidBinary(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Validated binary"); + } else { + CLIENT_DBG("Could not validate binary"); + } + return Reply->number(); + }, + /* Error Value */ 0); +} + +int32_t RemoteOffloadClient::getNumberOfDevices() { + return remoteCall( + /* Preprocess */ + [&](Status &RPCStatus, ClientContext &Context) { + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + CLIENT_DBG("Getting number of devices"); + RPCStatus = Stub->GetNumberOfDevices(&Context, *Request, Reply); + + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Found %d devices", Reply->number()); + } else { + CLIENT_DBG("Could not get the number of devices"); + } + return Reply->number(); + }, + /*Error Value*/ -1); +} + +int32_t RemoteOffloadClient::initDevice(int32_t DeviceId) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_number(DeviceId); + + CLIENT_DBG("Initializing device %d", DeviceId); + RPCStatus = Stub->InitDevice(&Context, *Request, Reply); + + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (!Reply->number()) { + CLIENT_DBG("Initialized device %d", DeviceId); + } else { + CLIENT_DBG("Could not initialize device %d", DeviceId); + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +int32_t RemoteOffloadClient::initRequires(int64_t RequiresFlags) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + Request->set_number(RequiresFlags); + CLIENT_DBG("Initializing requires"); + RPCStatus = Stub->InitRequires(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](const auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Initialized requires"); + } else { + CLIENT_DBG("Could not initialize requires"); + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +__tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId, + __tgt_device_image *Image) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *ImageMessage = + protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + ImageMessage->set_image_ptr((uint64_t)Image->ImageStart); + ImageMessage->set_device_id(DeviceId); + + CLIENT_DBG("Loading Image %p to device %d", Image, DeviceId); + RPCStatus = Stub->LoadBinary(&Context, *ImageMessage, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (Reply->entries_size() == 0) { + CLIENT_DBG("Could not load image %p onto device %d", Image, DeviceId); + return (__tgt_target_table *)nullptr; + } + DevicesToTables[DeviceId] = std::make_unique<__tgt_target_table>(); + unloadTargetTable(*Reply, DevicesToTables[DeviceId].get(), + RemoteEntries[DeviceId]); + + CLIENT_DBG("Loaded Image %p to device %d with %d entries", Image, + DeviceId, Reply->entries_size()); + + return DevicesToTables[DeviceId].get(); + }, + /* Error Value */ (__tgt_target_table *)nullptr, + /* Timeout */ false); +} + +int64_t RemoteOffloadClient::synchronize(int32_t DeviceId, + __tgt_async_info *AsyncInfoPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Info = + protobuf::Arena::CreateMessage(Arena.get()); + + Info->set_device_id(DeviceId); + Info->set_queue_ptr((uint64_t)AsyncInfoPtr); + + CLIENT_DBG("Synchronizing device %d", DeviceId); + RPCStatus = Stub->Synchronize(&Context, *Info, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Synchronized device %d", DeviceId); + } else { + CLIENT_DBG("Could not synchronize device %d", DeviceId); + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +int32_t RemoteOffloadClient::isDataExchangeable(int32_t SrcDevId, + int32_t DstDevId) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_src_dev_id(SrcDevId); + Request->set_dst_dev_id(DstDevId); + + CLIENT_DBG("Asking if data is exchangeable between %d, %d", SrcDevId, + DstDevId); + RPCStatus = Stub->IsDataExchangeable(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Data is exchangeable between %d, %d", SrcDevId, DstDevId); + } else { + CLIENT_DBG("Data is not exchangeable between %d, %d", SrcDevId, + DstDevId); + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size, + void *HstPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_size(Size); + Request->set_hst_ptr((uint64_t)HstPtr); + + CLIENT_DBG("Allocating %ld bytes on device %d", Size, DeviceId); + RPCStatus = Stub->DataAlloc(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (Reply->number()) { + CLIENT_DBG("Allocated %ld bytes on device %d at %p", Size, DeviceId, + (void *)Reply->number()); + } else { + CLIENT_DBG("Could not allocate %ld bytes on device %d at %p", Size, + DeviceId, (void *)Reply->number()); + } + return (void *)Reply->number(); + }, + /* Error Value */ (void *)nullptr); +} + +int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + std::unique_ptr> Writer( + Stub->DataSubmitAsync(&Context, Reply)); + + if (Size > BlockSize) { + int64_t Start = 0, End = BlockSize; + for (auto I = 0; I < ceil((float)Size / BlockSize); I++) { + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_data((char *)HstPtr + Start, End - Start); + Request->set_hst_ptr((uint64_t)HstPtr); + Request->set_tgt_ptr((uint64_t)TgtPtr); + Request->set_start(Start); + Request->set_size(Size); + Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + + CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p", + Start, End, Size, DeviceId, TgtPtr) + + if (!Writer->Write(*Request)) { + CLIENT_DBG("Broken stream when submitting data"); + Reply->set_number(0); + return Reply; + } + + Start += BlockSize; + End += BlockSize; + if (End >= Size) + End = Size; + } + } else { + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_data(HstPtr, Size); + Request->set_hst_ptr((uint64_t)HstPtr); + Request->set_tgt_ptr((uint64_t)TgtPtr); + Request->set_start(0); + Request->set_size(Size); + + CLIENT_DBG("Submitting %ld bytes async on device %d at %p", Size, + DeviceId, TgtPtr) + if (!Writer->Write(*Request)) { + CLIENT_DBG("Broken stream when submitting data"); + Reply->set_number(0); + return Reply; + } + } + + Writer->WritesDone(); + RPCStatus = Writer->Finish(); + + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (!Reply->number()) { + CLIENT_DBG("Async submitted %ld bytes on device %d at %p", Size, + DeviceId, TgtPtr) + } else { + CLIENT_DBG("Could not async submit %ld bytes on device %d at %p", + Size, DeviceId, TgtPtr) + } + return Reply->number(); + }, + /* Error Value */ -1, + /* Timeout */ false); +} + +int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_size(Size); + Request->set_hst_ptr((int64_t)HstPtr); + Request->set_tgt_ptr((int64_t)TgtPtr); + Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + std::unique_ptr> Reader( + Stub->DataRetrieveAsync(&Context, *Request)); + Reader->WaitForInitialMetadata(); + while (Reader->Read(Reply)) { + if (Reply->ret()) { + CLIENT_DBG("Could not async retrieve %ld bytes on device %d at %p " + "for %p", + Size, DeviceId, TgtPtr, HstPtr) + return Reply; + } + + if (Reply->start() == 0 && Reply->size() == Reply->data().size()) { + CLIENT_DBG("Async retrieving %ld bytes on device %d at %p for %p", + Size, DeviceId, TgtPtr, HstPtr) + + memcpy(HstPtr, Reply->data().data(), Reply->data().size()); + + return Reply; + } + CLIENT_DBG("Retrieving %lu-%lu/%lu bytes async from (%p) to (%p) " + "on Device %d", + Reply->start(), Reply->start() + Reply->data().size(), + Reply->size(), (void *)Request->tgt_ptr(), HstPtr, + Request->device_id()); + + memcpy((void *)((char *)HstPtr + Reply->start()), + Reply->data().data(), Reply->data().size()); + } + RPCStatus = Reader->Finish(); + + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (!Reply->ret()) { + CLIENT_DBG("Async retrieve %ld bytes on Device %d", Size, DeviceId); + } else { + CLIENT_DBG("Could not async retrieve %ld bytes on Device %d", Size, + DeviceId); + } + return Reply->ret(); + }, + /* Error Value */ -1, + /* Timeout */ false); +} + +int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, + int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_src_dev_id(SrcDevId); + Request->set_src_ptr((uint64_t)SrcPtr); + Request->set_dst_dev_id(DstDevId); + Request->set_dst_ptr((uint64_t)DstPtr); + Request->set_size(Size); + Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + + CLIENT_DBG( + "Exchanging %ld bytes on device %d at %p for %p on device %d", Size, + SrcDevId, SrcPtr, DstPtr, DstDevId); + RPCStatus = Stub->DataExchangeAsync(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (Reply->number()) { + CLIENT_DBG( + "Exchanged %ld bytes on device %d at %p for %p on device %d", + Size, SrcDevId, SrcPtr, DstPtr, DstDevId); + } else { + CLIENT_DBG("Could not exchange %ld bytes on device %d at %p for %p " + "on device %d", + Size, SrcDevId, SrcPtr, DstPtr, DstDevId); + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Request = protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_tgt_ptr((uint64_t)TgtPtr); + + CLIENT_DBG("Deleting data at %p on device %d", TgtPtr, DeviceId) + RPCStatus = Stub->DataDelete(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (!Reply->number()) { + CLIENT_DBG("Deleted data at %p on device %d", TgtPtr, DeviceId) + } else { + CLIENT_DBG("Could not delete data at %p on device %d", TgtPtr, + DeviceId) + } + return Reply->number(); + }, + /* Error Value */ -1); +} + +int32_t RemoteOffloadClient::runTargetRegionAsync( + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + + Request->set_tgt_entry_ptr( + (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); + + char **ArgPtr = (char **)TgtArgs; + for (auto I = 0; I < ArgNum; I++, ArgPtr++) + Request->add_tgt_args((uint64_t)*ArgPtr); + + char *OffsetPtr = (char *)TgtOffsets; + for (auto I = 0; I < ArgNum; I++, OffsetPtr++) + Request->add_tgt_offsets((uint64_t)*OffsetPtr); + + Request->set_arg_num(ArgNum); + + CLIENT_DBG("Running target region async on device %d", DeviceId); + RPCStatus = Stub->RunTargetRegionAsync(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (!Reply->number()) { + CLIENT_DBG("Ran target region async on device %d", DeviceId); + } else { + CLIENT_DBG("Could not run target region async on device %d", + DeviceId); + } + return Reply->number(); + }, + /* Error Value */ -1, + /* Timeout */ false); +} + +int32_t RemoteOffloadClient::runTargetTeamRegionAsync( + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, + uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) { + return remoteCall( + /* Preprocess */ + [&](auto &RPCStatus, auto &Context) { + auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); + auto *Request = + protobuf::Arena::CreateMessage(Arena.get()); + + Request->set_device_id(DeviceId); + Request->set_queue_ptr((uint64_t)AsyncInfoPtr); + + Request->set_tgt_entry_ptr( + (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); + + char **ArgPtr = (char **)TgtArgs; + for (auto I = 0; I < ArgNum; I++, ArgPtr++) { + Request->add_tgt_args((uint64_t)*ArgPtr); + } + + char *OffsetPtr = (char *)TgtOffsets; + for (auto I = 0; I < ArgNum; I++, OffsetPtr++) + Request->add_tgt_offsets((uint64_t)*OffsetPtr); + + Request->set_arg_num(ArgNum); + Request->set_team_num(TeamNum); + Request->set_thread_limit(ThreadLimit); + Request->set_loop_tripcount(LoopTripcount); + + CLIENT_DBG("Running target team region async on device %d", DeviceId); + RPCStatus = Stub->RunTargetTeamRegionAsync(&Context, *Request, Reply); + return Reply; + }, + /* Postprocess */ + [&](auto &Reply) { + if (!Reply->number()) { + CLIENT_DBG("Ran target team region async on device %d", DeviceId); + } else { + CLIENT_DBG("Could not run target team region async on device %d", + DeviceId); + } + return Reply->number(); + }, + /* Error Value */ -1, + /* Timeout */ false); +} + +// TODO: Better error handling for the next three functions +int32_t RemoteClientManager::shutdown(void) { + int32_t Ret = 0; + for (auto &Client : Clients) + Ret &= Client.shutdown(); + return Ret; +} + +int32_t RemoteClientManager::registerLib(__tgt_bin_desc *Desc) { + int32_t Ret = 0; + for (auto &Client : Clients) + Ret &= Client.registerLib(Desc); + return Ret; +} + +int32_t RemoteClientManager::unregisterLib(__tgt_bin_desc *Desc) { + int32_t Ret = 0; + for (auto &Client : Clients) + Ret &= Client.unregisterLib(Desc); + return Ret; +} + +int32_t RemoteClientManager::isValidBinary(__tgt_device_image *Image) { + int32_t ClientIdx = 0; + for (auto &Client : Clients) { + if (auto Ret = Client.isValidBinary(Image)) + return Ret; + ClientIdx++; + } + return 0; +} + +int32_t RemoteClientManager::getNumberOfDevices() { + auto ClientIdx = 0; + for (auto &Client : Clients) { + if (auto NumDevices = Client.getNumberOfDevices()) { + Devices.push_back(NumDevices); + } + ClientIdx++; + } + + return std::accumulate(Devices.begin(), Devices.end(), 0); +} + +std::pair RemoteClientManager::mapDeviceId(int32_t DeviceId) { + for (size_t ClientIdx = 0; ClientIdx < Devices.size(); ClientIdx++) { + if (!(DeviceId >= Devices[ClientIdx])) + return {ClientIdx, DeviceId}; + DeviceId -= Devices[ClientIdx]; + } + return {-1, -1}; +} + +int32_t RemoteClientManager::initDevice(int32_t DeviceId) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].initDevice(DeviceIdx); +} + +int32_t RemoteClientManager::initRequires(int64_t RequiresFlags) { + for (auto &Client : Clients) + Client.initRequires(RequiresFlags); + + return RequiresFlags; +} + +__tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId, + __tgt_device_image *Image) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].loadBinary(DeviceIdx, Image); +} + +int64_t RemoteClientManager::synchronize(int32_t DeviceId, + __tgt_async_info *AsyncInfoPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfoPtr); +} + +int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId, + int32_t DstDevId) { + int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx; + std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId); + std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId); + return Clients[SrcClientIdx].isDataExchangeable(SrcDeviceIdx, DstDeviceIdx); +} + +void *RemoteClientManager::dataAlloc(int32_t DeviceId, int64_t Size, + void *HstPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].dataAlloc(DeviceIdx, Size, HstPtr); +} + +int32_t RemoteClientManager::dataDelete(int32_t DeviceId, void *TgtPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].dataDelete(DeviceIdx, TgtPtr); +} + +int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size, + AsyncInfoPtr); +} + +int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size, + AsyncInfoPtr); +} + +int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, + int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx; + std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId); + std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId); + return Clients[SrcClientIdx].dataExchangeAsync( + SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfoPtr); +} + +int32_t RemoteClientManager::runTargetRegionAsync( + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].runTargetRegionAsync( + DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfoPtr); +} + +int32_t RemoteClientManager::runTargetTeamRegionAsync( + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, + uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) { + int32_t ClientIdx, DeviceIdx; + std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); + return Clients[ClientIdx].runTargetTeamRegionAsync( + DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, + LoopTripCount, AsyncInfoPtr); +} diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -0,0 +1,170 @@ +//===--------------------- rtl.cpp - Remote RTL Plugin --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// RTL for Host. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "Client.h" +#include "Utils.h" +#include "omptarget.h" +#include "omptargetplugin.h" + +#define TARGET_NAME RPC +#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL" + +RemoteClientManager *Manager; + +__attribute__((constructor(101))) void initRPC() { + DP("Init RPC library!\n"); + + RPCConfig Config; + parseEnvironment(Config); + + int Timeout = 5; + if (const char *Env1 = std::getenv("LIBOMPTARGET_RPC_LATENCY")) + Timeout = std::stoi(Env1); + + Manager = new RemoteClientManager(Config.ServerAddresses, Timeout, + Config.MaxSize, Config.BlockSize); +} + +__attribute__((destructor(101))) void deinitRPC() { + Manager->shutdown(); // TODO: Error handle shutting down + DP("Deinit RPC library!\n"); + delete Manager; +} + +// Exposed library API function +#ifdef __cplusplus +extern "C" { +#endif + +int32_t __tgt_rtl_register_lib(__tgt_bin_desc *Desc) { + return Manager->registerLib(Desc); +} + +int32_t __tgt_rtl_unregister_lib(__tgt_bin_desc *Desc) { + return Manager->unregisterLib(Desc); +} + +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { + return Manager->isValidBinary(Image); +} + +int32_t __tgt_rtl_number_of_devices() { return Manager->getNumberOfDevices(); } + +int32_t __tgt_rtl_init_device(int32_t DeviceId) { + return Manager->initDevice(DeviceId); +} + +int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { + return Manager->initRequires(RequiresFlags); +} + +__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, + __tgt_device_image *Image) { + return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image); +} + +int32_t __tgt_rtl_synchronize(int32_t DeviceId, + __tgt_async_info *AsyncInfoPtr) { + return Manager->synchronize(DeviceId, AsyncInfoPtr); +} + +int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) { + return Manager->isDataExchangeable(SrcDevId, DstDevId); +} + +void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr) { + return Manager->dataAlloc(DeviceId, Size, HstPtr); +} + +int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size) { + return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, nullptr); +} + +int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr); +} + +int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size) { + return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, nullptr); +} + +int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, + AsyncInfoPtr); +} + +int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { + return Manager->dataDelete(DeviceId, TgtPtr); +} + +int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, int64_t Size) { + return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, + nullptr); +} + +int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, + int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, + AsyncInfoPtr); +} + +int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum) { + return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, nullptr); +} + +int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, + __tgt_async_info *AsyncInfoPtr) { + return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, AsyncInfoPtr); +} + +int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, + uint64_t LoopTripCount) { + return Manager->runTargetTeamRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, + ThreadLimit, LoopTripCount, nullptr); +} + +int32_t __tgt_rtl_run_target_team_region_async( + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, + uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) { + return Manager->runTargetTeamRegionAsync( + DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, + LoopTripCount, AsyncInfoPtr); +} + +// Exposed library API function +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -20,6 +20,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) +set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) + include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS}) # Build libomptarget library with libdl dependency. Add LLVMSupport