Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -81,6 +81,8 @@ OMPRTL_NVPTX__kmpc_end_reduce_nowait, /// Call to void __kmpc_data_sharing_init_stack(); OMPRTL_NVPTX__kmpc_data_sharing_init_stack, + /// Call to void __kmpc_data_sharing_init_stack_spmd(); + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, /// Call to void* __kmpc_data_sharing_push_stack(size_t size, /// int16_t UseSharedMemory); OMPRTL_NVPTX__kmpc_data_sharing_push_stack, @@ -1025,6 +1027,12 @@ /*RequiresDataSharing=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + + // For data sharing, we need to initialize the stack. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitBranch(ExecuteBB); CGF.EmitBlock(ExecuteBB); @@ -1107,11 +1115,6 @@ // Wait for parallel work syncCTAThreads(CGF); - // For data sharing, we need to initialize the stack for workers. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); - Address WorkFn = CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn"); Address ExecStatus = @@ -1417,6 +1420,13 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); break; } + case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { + /// Build void __kmpc_data_sharing_init_stack_spmd(); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); + break; + } case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { // Build void *__kmpc_data_sharing_push_stack(size_t size, // int16_t UseSharedMemory); Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -30,7 +30,6 @@ /// ========= In the worker function ========= /// // CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker() // CK1: call void @llvm.nvvm.barrier0() -// CK1: call void @__kmpc_data_sharing_init_stack /// ========= In the kernel function ========= /// Index: test/OpenMP/nvptx_data_sharing_spmd.cpp =================================================================== --- /dev/null +++ test/OpenMP/nvptx_data_sharing_spmd.cpp @@ -0,0 +1,24 @@ +// Test device global memory data sharing initialization codegen for spmd. +///==========================================================================/// + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void test_ds(){ + #pragma omp target teams distribute parallel for + for (int i = 0; i < 100; i++ ) { + int a = 100; + } +} + +// CK1: {{.*}}define weak void @__omp_offloading{{.*}}test_ds{{.*}}() +// CK1: call void @__kmpc_spmd_kernel_init +// CK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd + +#endif +