Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -33,10 +33,11 @@ /// \brief Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, void ***args, kmp_int32 nArgs); + /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void - /// ***args); + /// ***args, int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_parallel, /// \brief Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -521,7 +522,9 @@ // Set up shared arguments Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); - llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -637,18 +640,21 @@ } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, void ***args, kmp_int32 nArgs); + /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty, + CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args); + /// Build bool __kmpc_kernel_parallel(void **outlined_function, void + /// ***args, int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0)}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -947,8 +953,10 @@ CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_args"); llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); + // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {ID, SharedArgsPtr, - Bld.getInt32(CapturedVars.size())}; + Bld.getInt32(CapturedVars.size()), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), @@ -968,9 +976,10 @@ Idx++; } } else { - llvm::Value *Args[] = {ID, - llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), - /*nArgs=*/Bld.getInt32(0)}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = { + ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), + /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -24,7 +24,7 @@ // CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{ // CK1: [[SHAREDARGS:%.+]] = alloca i8** -// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) +// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]], i16 1) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) @@ -32,7 +32,7 @@ // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() // CK1: [[SHAREDARGS1:%.+]] = alloca i8** -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1, i16 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]] // CK1: [[SHAREDVAR:%.+]] = bitcast i32* {{.*}} to i8* Index: test/OpenMP/nvptx_target_teams_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_teams_codegen.cpp +++ test/OpenMP/nvptx_target_teams_codegen.cpp @@ -60,7 +60,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -148,7 +148,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],