diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -33,6 +33,10 @@ /// Unknown execution mode (orphaned directive). EM_Unknown, }; + + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; + private: /// Parallel outlined function work for workers to execute. llvm::SmallVector Work; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,96 +28,6 @@ using namespace llvm::omp; namespace { -enum OpenMPRTLFunctionNVPTX { - /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_kernel_init, - /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - OMPRTL_NVPTX__kmpc_kernel_deinit, - /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, - /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); - OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function); - OMPRTL_NVPTX__kmpc_kernel_parallel, - /// Call to void __kmpc_kernel_end_parallel(); - OMPRTL_NVPTX__kmpc_kernel_end_parallel, - /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_serialized_parallel, - /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_end_serialized_parallel, - /// Call to int32_t __kmpc_shuffle_int32(int32_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int32, - /// Call to int64_t __kmpc_shuffle_int64(int64_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int64, - /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, void *global_buffer, int32_t num_of_records, void* - /// reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - /// *buffer, int idx, void *reduce_data)); - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, - /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); - OMPRTL_NVPTX__kmpc_end_reduce_nowait, - /// Call to void __kmpc_data_sharing_init_stack(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack, - /// Call to void __kmpc_data_sharing_init_stack_spmd(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, - /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t - /// UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, - /// Call to void __kmpc_data_sharing_pop_stack(void *a); - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, - /// Call to void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - OMPRTL_NVPTX__kmpc_begin_sharing_variables, - /// Call to void __kmpc_end_sharing_variables(); - OMPRTL_NVPTX__kmpc_end_sharing_variables, - /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) - OMPRTL_NVPTX__kmpc_get_shared_variables, - /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_parallel_level, - /// Call to int8_t __kmpc_is_spmd_exec_mode(); - OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, - /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - /// const void *buf, size_t size, int16_t is_shared, const void **res); - OMPRTL_NVPTX__kmpc_get_team_static_memory, - /// Call to void __kmpc_restore_team_static_memory(int16_t - /// isSPMDExecutionMode, int16_t is_shared); - OMPRTL_NVPTX__kmpc_restore_team_static_memory, - /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL__kmpc_barrier_simple_spmd, - /// Call to int32_t __kmpc_warp_active_thread_mask(void); - OMPRTL_NVPTX__kmpc_warp_active_thread_mask, - /// Call to void __kmpc_syncwarp(int32_t Mask); - OMPRTL_NVPTX__kmpc_syncwarp, -}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1243,13 +1153,13 @@ // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_init), + Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1272,8 +1182,9 @@ // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_deinit), + Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1347,13 +1258,14 @@ /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), + Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1379,9 +1291,9 @@ // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), + Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1415,7 +1327,7 @@ } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1450,8 +1362,10 @@ // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); + llvm::Value *Ret = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_parallel), + Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1516,9 +1430,9 @@ // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1533,328 +1447,6 @@ clearLocThreadIdInsertPt(CGF); } -/// Returns specified OpenMP runtime function for the current OpenMP -/// implementation. Specialized for the NVPTX device. -/// \param Function OpenMP runtime function. -/// \return Specified function. -llvm::FunctionCallee -CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { - llvm::FunctionCallee RTLFn = nullptr; - switch (static_cast(Function)) { - case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t - // RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { - // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { - // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { - /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; - llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - auto *FnTy = - llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { - /// Build void __kmpc_kernel_end_parallel(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_serialized_parallel: { - // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { - // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int32: { - // Build int32_t __kmpc_shuffle_int32(int32_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int64: { - // Build int64_t __kmpc_shuffle_int64(int64_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { - // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, - // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* - // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t - // lane_id, int16_t lane_offset, int16_t Algorithm Version), void - // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - // global_tid, void *global_buffer, int32_t num_of_records, void* - // reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - // *buffer, int idx, void *reduce_data)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, - CGM.VoidPtrTy}; - auto *GlobalListFnTy = - llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { - /// Build void __kmpc_data_sharing_init_stack(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { - /// Build void __kmpc_data_sharing_init_stack_spmd(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { - // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, - // int16_t UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t - // UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { - // Build void __kmpc_data_sharing_pop_stack(void *a); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, - /*Name=*/"__kmpc_data_sharing_pop_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { - /// Build void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_end_sharing_variables: { - /// Build void __kmpc_end_sharing_variables(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_get_shared_variables: { - /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_parallel_level: { - // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); - break; - } - case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { - // Build int8_t __kmpc_is_spmd_exec_mode(); - auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); - break; - } - case OMPRTL_NVPTX__kmpc_get_team_static_memory: { - // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - // const void *buf, size_t size, int16_t is_shared, const void **res); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, - CGM.Int16Ty, CGM.VoidPtrPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); - break; - } - case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { - // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, - // int16_t is_shared); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); - break; - } - case OMPRTL__kmpc_barrier: { - // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = - CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - break; - } - case OMPRTL__kmpc_barrier_simple_spmd: { - // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateConvergentRuntimeFunction( - FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { - // Build int32_t __kmpc_warp_active_thread_mask(void); - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); - break; - } - case OMPRTL_NVPTX__kmpc_syncwarp: { - // Build void __kmpc_syncwarp(kmp_int32 Mask); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); - break; - } - } - return RTLFn; -} - void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -1929,9 +1521,12 @@ } CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM, "_", "$") { + : CGOpenMPRuntime(CGM, "_", "$"), OMPBuilder(CGM.getModule()) { if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP NVPTX can only handle device code."); + + // Initialize Types used in OpenMPIRBuilder from OMPKinds.def + OMPBuilder.initialize(); } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, @@ -2157,12 +1752,14 @@ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2196,8 +1793,8 @@ llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2259,9 +1856,10 @@ CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -2288,10 +1886,10 @@ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - IsInTTDRegion - ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack - : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack + : OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2390,8 +1988,8 @@ llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2419,7 +2017,8 @@ for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2434,8 +2033,8 @@ (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2456,14 +2055,15 @@ getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_restore_team_static_memory), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2535,9 +2135,11 @@ llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2553,7 +2155,8 @@ // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2570,9 +2173,10 @@ llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2606,8 +2210,8 @@ syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2631,8 +2235,9 @@ llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2640,7 +2245,8 @@ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2704,9 +2310,11 @@ llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2737,8 +2345,9 @@ cast(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); - Call->setConvergent(); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_barrier_simple_spmd), + Args); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2752,9 +2361,10 @@ unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); - Call->setConvergent(); + llvm::CallInst *Call = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + Args); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2770,8 +2380,8 @@ auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2813,8 +2423,9 @@ // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); + (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_syncwarp), + Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2869,9 +2480,9 @@ assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL_NVPTX__kmpc_shuffle_int32 - : OMPRTL_NVPTX__kmpc_shuffle_int64; + RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL___kmpc_shuffle_int32 + : OMPRTL___kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2881,7 +2492,8 @@ Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); + RT.OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), + {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4391,8 +4003,8 @@ InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4441,8 +4053,8 @@ BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4477,7 +4089,8 @@ RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4488,7 +4101,7 @@ const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4638,9 +4251,9 @@ CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -91,7 +91,7 @@ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -321,10 +321,10 @@ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}( @@ -377,6 +377,6 @@ // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] -// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} +// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}} #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -220,6 +220,9 @@ __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) +__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) +__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) +__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -295,8 +298,6 @@ __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr, Int32, /* kmp_task_affinity_info_t */ VoidPtr) @@ -502,17 +503,42 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// OpenMP Device runtime functions +__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) +__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) +__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16) +__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) +__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) +__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) +__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, + GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) + +__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) -__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, - Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) + +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) - -/// Note that device runtime functions (in the following) do not necessarily -/// need attributes as we expect to see the definitions. -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) -__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) +__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) +__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) +__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) +__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, + Int16, VoidPtrPtr) +__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) +__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) +__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) __OMP_RTL(__last, false, Void, ) @@ -561,8 +587,8 @@ __OMP_ATTRS_SET(BarrierAttrs, OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind)) - : AttributeSet(EnumAttr(NoUnwind))) + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) + : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, OptimisticAttributes @@ -634,6 +660,11 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),