Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -801,6 +801,11 @@ // Wait for parallel work syncCTAThreads(CGF); + // For data sharing, we need to initialize the stack for workers. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + Address WorkFn = CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn"); Address ExecStatus = Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -27,6 +27,11 @@ } } +/// ========= In the worker function ========= /// +// CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker() +// CK1: call void @llvm.nvvm.barrier0() +// CK1: call void @__kmpc_data_sharing_init_stack + /// ========= In the kernel function ========= /// // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}()