diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -38,11 +38,9 @@ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, int16_t - /// IsOMPRuntimeInitialized); + /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); OMPRTL_NVPTX__kmpc_kernel_parallel, /// Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -1466,8 +1464,7 @@ CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {WorkFn.getPointer(), - /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {WorkFn.getPointer()}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -1595,17 +1592,16 @@ } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -2569,7 +2565,7 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); // Prepare for parallel region. Indicate the outlined function. - llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -55,7 +55,7 @@ // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 // CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1 // CK1: store i32 10, i32* [[A]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0 @@ -65,7 +65,7 @@ // CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CK1: call void @__kmpc_end_sharing_variables() // CK1: store i32 100, i32* [[B]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2) // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -92,7 +92,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]] +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -166,13 +166,13 @@ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_serialized_parallel( // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]]( // CHECK: call void @__kmpc_end_serialized_parallel( -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK-64-DAG: load i32, i32* [[REF_A]] @@ -211,7 +211,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -291,7 +291,7 @@ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: br label {{%?}}[[IF_END:.+]] diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -612,7 +612,7 @@ // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: br label -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -68,7 +68,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -154,7 +154,7 @@ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -88,7 +88,7 @@ // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0 // // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1) // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]], // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -584,6 +584,11 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu --- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu @@ -72,10 +72,8 @@ } // This routine is always called by the team master.. -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn) { PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); omptarget_nvptx_workFn = WorkFn; @@ -120,12 +118,9 @@ // returns True if this thread is active, else False. // // Only the worker threads call this routine. -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN bool __kmpc_kernel_parallel(void **WorkFn) { PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); - // Work function and arguments for L1 parallel region. *WorkFn = omptarget_nvptx_workFn; diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -424,10 +424,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized); -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized); +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); +EXTERN bool __kmpc_kernel_parallel(void **WorkFn); EXTERN void __kmpc_kernel_end_parallel(); EXTERN void __kmpc_data_sharing_init_stack();