diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1182,7 +1182,10 @@ packet->grid_size_y = 1; packet->grid_size_z = 1; packet->private_segment_size = KernelInfoEntry.private_segment_size; - packet->group_segment_size = KernelInfoEntry.group_segment_size; + if (const char *EnvStr = getenv("LIBOMPTARGET_SHARED_MEMORY_SIZE")) + packet->group_segment_size = std::stoi(EnvStr); + else + packet->group_segment_size = KernelInfoEntry.group_segment_size; packet->kernel_object = KernelInfoEntry.kernel_object; packet->kernarg_address = 0; // use the block allocator packet->reserved2 = 0; // impl writes id_ here @@ -1541,6 +1544,9 @@ if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { host_device_env.DebugKind = std::stoi(envStr); } + if (const char *EnvStr = getenv("LIBOMPTARGET_SHARED_MEMORY_SIZE")) { + host_device_env.DynamicMemSize = std::stoi(EnvStr); + } int rc = get_symbol_info_without_loading((char *)image->ImageStart, img_size, sym(), &si); diff --git a/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c b/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/api/omp_dynamic_shared_memory_amdgpu.c @@ -0,0 +1,25 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm -openmp-opt-inline-device +// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \ +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include + +int main() { + int x; +#pragma omp target parallel map(from : x) + { + int *buf = llvm_omp_target_dynamic_shared_alloc() + 252; +#pragma omp barrier + if (omp_get_thread_num() == 0) + *buf = 1; +#pragma omp barrier + if (omp_get_thread_num() == 1) + x = *buf; + } + + // CHECK: PASS + if (x == 1 && llvm_omp_target_dynamic_shared_alloc() == NULL) + printf("PASS\n"); +}