diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -97,6 +97,7 @@ # gpu/rpc.h entrypoints libc.src.gpu.rpc_reset + libc.src.gpu.rpc_host_call ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -19,6 +19,7 @@ RPC_CLOSE_FILE = 6, RPC_MALLOC = 7, RPC_FREE = 8, + RPC_HOST_CALL = 9, // TODO: Move these out of here and handle then with custom handlers in the // loader. RPC_TEST_INCREMENT = 1000, diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -10,6 +10,11 @@ RetValSpec, [ArgSpec, ArgSpec] >, + FunctionSpec< + "rpc_host_call", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec] + >, ] >; let Headers = [ diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt --- a/libc/src/gpu/CMakeLists.txt +++ b/libc/src/gpu/CMakeLists.txt @@ -8,3 +8,14 @@ libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils ) + +add_entrypoint_object( + rpc_host_call + SRCS + rpc_host_call.cpp + HDRS + rpc_host_call.h + DEPENDS + libc.src.__support.RPC.rpc_client + libc.src.__support.GPU.utils +) diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h new file mode 100644 --- /dev/null +++ b/libc/src/gpu/rpc_host_call.h @@ -0,0 +1,20 @@ +//===-- Implementation header for RPC functions -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H +#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H + +#include // size_t + +namespace __llvm_libc { + +void rpc_host_call(void *fn, void *buffer, size_t size); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_GPU_RPC_H_HOST_CALL diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp new file mode 100644 --- /dev/null +++ b/libc/src/gpu/rpc_host_call.cpp @@ -0,0 +1,30 @@ +//===---------- GPU implementation of the external RPC call function ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/gpu/rpc_host_call.h" + +#include "llvm-libc-types/rpc_opcodes_t.h" +#include "src/__support/GPU/utils.h" +#include "src/__support/RPC/rpc_client.h" +#include "src/__support/common.h" + +namespace __llvm_libc { + +// This calls the associated function pointer on the RPC server with the given +// arguments. We expect that the pointer here is a valid pointer on the server. +LLVM_LIBC_FUNCTION(void, rpc_host_call, (void *fn, void *data, size_t size)) { + rpc::Client::Port port = rpc::client.open(); + port.send_n(data, size); + port.send([=](rpc::Buffer *buffer) { + buffer->data[0] = reinterpret_cast(fn); + }); + port.recv([](rpc::Buffer *) {}); + port.close(); +} + +} // namespace __llvm_libc diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -129,6 +129,18 @@ }); break; } + case RPC_HOST_CALL: { + uint64_t sizes[rpc::MAX_LANE_SIZE] = {0}; + void *args[rpc::MAX_LANE_SIZE] = {nullptr}; + port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; }); + port->recv([&](rpc::Buffer *buffer, uint32_t id) { + reinterpret_cast(buffer->data[0])(args[id]); + }); + port->send([&](rpc::Buffer *, uint32_t id) { + delete[] reinterpret_cast(args[id]); + }); + break; + } // TODO: Move handling of these test cases to the loader implementation. case RPC_TEST_INCREMENT: { port->recv_and_send([](rpc::Buffer *buffer) { @@ -341,7 +353,7 @@ using ServerPort = std::variant::Port *, rpc::Server<32>::Port *, rpc::Server<64>::Port *>; -ServerPort getPort(rpc_port_t ref) { +ServerPort get_port(rpc_port_t ref) { if (ref.lane_size == 1) return reinterpret_cast::Port *>(ref.handle); else if (ref.lane_size == 32) @@ -353,7 +365,7 @@ } void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = getPort(ref); + auto port = get_port(ref); std::visit( [=](auto &port) { port->send([=](rpc::Buffer *buffer) { @@ -364,7 +376,7 @@ } void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = getPort(ref); + auto port = get_port(ref); std::visit( [=](auto &port) { port->recv([=](rpc::Buffer *buffer) { @@ -376,7 +388,7 @@ void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = getPort(ref); + auto port = get_port(ref); std::visit( [=](auto &port) { port->recv_and_send([=](rpc::Buffer *buffer) { diff --git a/openmp/libomptarget/test/libc/host_call.c b/openmp/libomptarget/test/libc/host_call.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/libc/host_call.c @@ -0,0 +1,54 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: libc + +#include +#include +#include + +#pragma omp begin declare variant match(device = {kind(gpu)}) +// Extension provided by the 'libc' project. +void rpc_host_call(void *fn, void *args, size_t size); +#pragma omp declare target to(rpc_host_call) device_type(nohost) +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +// Dummy host implementation to make this work for all targets. +void rpc_host_call(void *fn, void *args, size_t size) { + ((void (*)(void *))fn)(args); +} +#pragma omp end declare variant + +typedef struct args_s { + int thread_id; + int block_id; +} args_t; + +// CHECK-DAG: Thread: 0, Block: 0 +// CHECK-DAG: Thread: 1, Block: 0 +// CHECK-DAG: Thread: 0, Block: 1 +// CHECK-DAG: Thread: 1, Block: 1 +// CHECK-DAG: Thread: 0, Block: 2 +// CHECK-DAG: Thread: 1, Block: 2 +// CHECK-DAG: Thread: 0, Block: 3 +// CHECK-DAG: Thread: 1, Block: 3 +void foo(void *data) { + assert(omp_is_initial_device() && "Not executing on host?"); + args_t *args = (args_t *)data; + printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id); +} + +void *fn_ptr = NULL; +#pragma omp declare target to(fn_ptr) + +int main() { + fn_ptr = (void *)&foo; +#pragma omp target update to(fn_ptr) + +#pragma omp target teams num_teams(4) +#pragma omp parallel num_threads(2) + { + args_t args = {omp_get_thread_num(), omp_get_team_num()}; + rpc_host_call(fn_ptr, &args, sizeof(args_t)); + } +}