Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -614,6 +614,11 @@ WorkerFunctionState &WST) { CGBuilderTy &Bld = CGF.Builder; + // For data sharing, initialize the stack for master and workers. + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); @@ -642,11 +647,6 @@ CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); - // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); - emitGenericVarsProlog(CGF, WST.Loc); } Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -32,8 +32,9 @@ // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: [[SHAREDARGS2:%.+]] = alloca i8** -// CK1: call void @__kmpc_kernel_init // CK1: call void @__kmpc_data_sharing_init_stack +// CK1: call void @__omp_offloading{{.*}}_worker() +// CK1: call void @__kmpc_kernel_init // CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 0) // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty* // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 Index: test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -25,8 +25,8 @@ // CHECK: define void @__omp_offloading_{{.*}}_main_l[[@LINE-10]](i{{64|32}} %{{[^,].*}}, i32* dereferenceable{{[^,]*}}, i{{64|32}} %{{[^,)]*}}) // CHECK: [[TID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* @ -// CHECK: call void @__kmpc_kernel_init( // CHECK: call void @__kmpc_data_sharing_init_stack() +// CHECK: call void @__kmpc_kernel_init( // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call void @__kmpc_kernel_prepare_parallel( // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[BUF_PTR_PTR:%[^,]+]], i{{64|32}} 4)