diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -132,6 +132,8 @@ int omp_get_initial_device(void); +void *llvm_omp_get_dynamic_shared(); + /// Synchronization /// ///{ diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -503,9 +503,9 @@ memory::freeShared(Ptr, Bytes, "Frontend free shared"); } -__attribute__((noinline)) void *__kmpc_get_dynamic_shared() { - return memory::getDynamicBuffer(); -} +void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); } + +void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } /// Allocate storage in shared memory to communicate arguments from the main /// thread to the workers in generic mode. If we exceed diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu --- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -21,6 +21,18 @@ static constexpr unsigned MinBytes = 8; +static constexpr unsigned Alignment = 8; + +/// External symbol to access dynamic shared memory. +extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); +#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) + +EXTERN void *__kmpc_get_dynamic_shared() { return DynamicSharedBuffer; } + +EXTERN void *llvm_omp_get_dynamic_shared() { + return __kmpc_get_dynamic_shared(); +} + template struct alignas(32) ThreadStackTy { static constexpr unsigned BytesPerThread = BPerThread; diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -92,6 +92,8 @@ EXTERN int omp_get_initial_device(void); EXTERN int omp_get_max_task_priority(void); +EXTERN void *llvm_omp_get_dynamic_shared(); + //////////////////////////////////////////////////////////////////////////////// // file below is swiped from kmpc host interface //////////////////////////////////////////////////////////////////////////////// @@ -499,4 +501,7 @@ /// paired allocation to make memory management easier. EXTERN void __kmpc_free_shared(void *Ptr, size_t Bytes); +/// Get a pointer to the dynamic shared memory buffer in the device. +EXTERN void *__kmpc_get_dynamic_shared(); + #endif diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -220,6 +220,9 @@ void *llvm_omp_target_alloc_host(size_t size, int device_num); void *llvm_omp_target_alloc_shared(size_t size, int device_num); +/// Dummy target so we have a symbol for generating host fallback. +void *llvm_omp_get_dynamic_shared(); + /// add the clauses of the requires directives in a given file void __tgt_register_requires(int64_t flags); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -53,6 +53,8 @@ return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__); } +EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; } + EXTERN void omp_target_free(void *device_ptr, int device_num) { TIMESCOPE(); DP("Call to omp_target_free for device %d and address " DPxMOD "\n", diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -40,6 +40,7 @@ llvm_omp_target_alloc_host; llvm_omp_target_alloc_shared; llvm_omp_target_alloc_device; + llvm_omp_get_dynamic_shared; __tgt_set_info_flag; __tgt_print_device_info; local: diff --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c --- a/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c +++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory.c @@ -1,22 +1,18 @@ // RUN: %libomptarget-compile-nvptx64-nvidia-cuda -fopenmp-target-new-runtime -// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=4 \ +// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \ // RUN: %libomptarget-run-nvptx64-nvidia-cuda | %fcheck-nvptx64-nvidia-cuda // REQUIRES: nvptx64-nvidia-cuda #include #include -void *get_dynamic_shared() { return NULL; } -#pragma omp begin declare variant match(device = {arch(nvptx64)}) -extern void *__kmpc_get_dynamic_shared(); -void *get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } -#pragma omp end declare variant +void *llvm_omp_get_dynamic_shared(); int main() { int x; #pragma omp target parallel map(from : x) { - int *buf = get_dynamic_shared(); + int *buf = llvm_omp_get_dynamic_shared() + 252; #pragma omp barrier if (omp_get_thread_num() == 0) *buf = 1; @@ -26,6 +22,6 @@ } // CHECK: PASS - if (x == 1) + if (x == 1 && llvm_omp_get_dynamic_shared() == NULL) printf("PASS\n"); }