diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -1006,9 +1006,9 @@ """"""""""""""""""""""""""""""" This environment variable sets the amount of dynamic shared memory in bytes used -by the kernel once it is launched. A pointer to the dynamic memory buffer can -currently only be accessed using the ``__kmpc_get_dynamic_shared`` device -runtime call. +by the kernel once it is launched. A pointer to the dynamic memory buffer can be +accessed using the ``llvm_omp_target_dynamic_shared_alloc`` function. An example +is shown in :ref:`libomptarget_dynamic_shared`. .. toctree:: :hidden: @@ -1104,6 +1104,40 @@ runtime functions on the target device. It is linked with the device code's LLVM IR during compilation. +.. _libomptarget_dynamic_shared: + +Dynamic Shared Memory +^^^^^^^^^^^^^^^^^^^^^ + +The target device runtime contains a pointer to the dynamic shared memory +buffer. This pointer can be obtained using the +``llvm_omp_target_dynamic_shared_alloc`` extension. If this function is called +from the host it will simply return a null pointer. In order to use this buffer +the kernel must be launched with an adequate amount of dynamic shared memory +allocated. Currently this is done using the ``LIBOMPTARGET_SHARED_MEMORY_SIZE`` +environment variable. An example is given below. + +.. code-block:: c++ + + void foo() { + int x; + #pragma omp target parallel map(from : x) + { + int *buf = llvm_omp_target_dynamic_shared_alloc(); + #pragma omp barrier + if (omp_get_thread_num() == 0) + *buf = 1; + #pragma omp barrier + if (omp_get_thread_num() == 1) + x = *buf; + } + } + +.. code-block:: console + + $ clang++ -fopenmp -fopenmp-targets=nvptx64 shared.c + $ env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 ./shared + .. _libomptarget_device_debugging: Debugging diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -132,7 +132,7 @@ int omp_get_initial_device(void); -void *llvm_omp_get_dynamic_shared(); +void *llvm_omp_target_dynamic_shared_alloc(); /// Synchronization /// diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -530,6 +530,10 @@ void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); } +void *llvm_omp_target_dynamic_shared_alloc() { + return __kmpc_get_dynamic_shared(); +} + void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } /// Allocate storage in shared memory to communicate arguments from the main diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -226,7 +226,7 @@ void *llvm_omp_target_alloc_shared(size_t size, int device_num); /// Dummy target so we have a symbol for generating host fallback. -void *llvm_omp_get_dynamic_shared(); +void *llvm_omp_target_dynamic_shared_alloc(); /// add the clauses of the requires directives in a given file void __tgt_register_requires(int64_t flags); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -53,6 +53,7 @@ return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__); } +EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; } EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; } EXTERN void omp_target_free(void *device_ptr, int device_num) { diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -40,7 +40,7 @@ llvm_omp_target_alloc_host; llvm_omp_target_alloc_shared; llvm_omp_target_alloc_device; - llvm_omp_get_dynamic_shared; + llvm_omp_target_dynamic_shared_alloc; __tgt_set_info_flag; __tgt_print_device_info; omp_get_interop_ptr; diff --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c --- a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c +++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c @@ -6,13 +6,11 @@ #include #include -void *llvm_omp_get_dynamic_shared(); - int main() { int x; #pragma omp target parallel map(from : x) { - int *buf = llvm_omp_get_dynamic_shared() + 252; + int *buf = llvm_omp_target_dynamic_shared_alloc() + 252; #pragma omp barrier if (omp_get_thread_num() == 0) *buf = 1; @@ -22,6 +20,6 @@ } // CHECK: PASS - if (x == 1 && llvm_omp_get_dynamic_shared() == NULL) + if (x == 1 && llvm_omp_target_dynamic_shared_alloc() == NULL) printf("PASS\n"); } diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -496,6 +496,9 @@ /* OpenMP 5.2 */ extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void); + /* LLVM Extensions */ + extern void *llvm_omp_target_dynamic_shared_alloc(); + # undef __KAI_KMPC_CONVENTION # undef __KMP_IMP