Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -598,6 +598,11 @@ WorkerFunctionState &WST) { CGBuilderTy &Bld = CGF.Builder; + // For data sharing, initialize the stack for master and workers. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); @@ -626,11 +631,6 @@ CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); - // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); - emitGenericVarsProlog(CGF, WST.Loc); } Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -30,8 +30,9 @@ // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: [[SHAREDARGS2:%.+]] = alloca i8** -// CK1: call void @__kmpc_kernel_init // CK1: call void @__kmpc_data_sharing_init_stack +// CK1: call void @__omp_offloading{{.*}}_worker() +// CK1: call void @__kmpc_kernel_init // CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 0) // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty* // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0