diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -485,7 +485,7 @@ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK3-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() // CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 @@ -508,7 +508,7 @@ // CHECK3-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK3-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK3: omp.critical.sync: -// CHECK3-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) // CHECK3-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK3-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK3-NEXT: br label [[OMP_CRITICAL_LOOP]] @@ -938,7 +938,7 @@ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK4-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() // CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 @@ -961,7 +961,7 @@ // CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK4-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK4: omp.critical.sync: -// CHECK4-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) // CHECK4-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP]] @@ -1391,7 +1391,7 @@ // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK5-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() // CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 @@ -1414,7 +1414,7 @@ // CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK5-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK5: omp.critical.sync: -// CHECK5-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) // CHECK5-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP]] @@ -1663,7 +1663,7 @@ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() // CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 @@ -1686,7 +1686,7 @@ // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK1: omp.critical.sync: -// CHECK1-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) // CHECK1-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] @@ -1935,7 +1935,7 @@ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() // CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 @@ -1958,7 +1958,7 @@ // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK2: omp.critical.sync: -// CHECK2-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) // CHECK2-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -654,9 +654,6 @@ omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); - // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL - Type *getLanemaskType(); - /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -39,7 +39,6 @@ __OMP_TYPE(Int64Ptr) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) -OMP_TYPE(LanemaskTy, getLanemaskType()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) @@ -443,8 +442,8 @@ __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,) -__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy) +__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) +__OMP_RTL(__kmpc_syncwarp, false, Void, Int64) __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -261,14 +261,6 @@ return Builder.CreatePointerCast(Ident, IdentPtr); } -Type *OpenMPIRBuilder::getLanemaskType() { - LLVMContext &Ctx = M.getContext(); - Triple triple(M.getTargetTriple()); - - // This test is adequate until deviceRTL has finer grained lane widths - return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); -} - Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -626,9 +626,9 @@ declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) -declare i32 @__kmpc_warp_active_thread_mask() +declare i64 @__kmpc_warp_active_thread_mask() -declare void @__kmpc_syncwarp(i32) +declare void @__kmpc_syncwarp(i64) declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) @@ -1149,10 +1149,10 @@ ; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask() +; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask() ; CHECK: ; Function Attrs: convergent nounwind -; CHECK-NEXT: declare void @__kmpc_syncwarp(i32) +; CHECK-NEXT: declare void @__kmpc_syncwarp(i64) ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) @@ -1677,10 +1677,10 @@ ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask() +; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask() ; OPTIMISTIC: ; Function Attrs: convergent nounwind -; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32) +; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64) ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**) diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -247,9 +247,9 @@ void __kmpc_flush(IdentTy *Loc); -__kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask(); +uint64_t __kmpc_warp_active_thread_mask(void); -void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask); +void __kmpc_syncwarp(uint64_t Mask); void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name); diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -286,11 +286,9 @@ void __kmpc_flush(IdentTy *Loc) { fence::kernel(__ATOMIC_SEQ_CST); } -__kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() { - return mapping::activemask(); -} +uint64_t __kmpc_warp_active_thread_mask(void) { return mapping::activemask(); } -void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) { synchronize::warp(Mask); } +void __kmpc_syncwarp(uint64_t Mask) { synchronize::warp(Mask); } void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) { omp_set_lock(reinterpret_cast(Name)); diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu --- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu @@ -123,7 +123,7 @@ // Vote //////////////////////////////////////////////////////////////////////////////// -EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() { +EXTERN uint64_t __kmpc_warp_active_thread_mask(void) { PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n"); return __kmpc_impl_activemask(); } @@ -132,7 +132,7 @@ // Syncwarp //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) { +EXTERN void __kmpc_syncwarp(uint64_t Mask) { PRINT0(LD_IO, "call __kmpc_syncwarp\n"); __kmpc_impl_syncwarp(Mask); } diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -375,9 +375,9 @@ EXTERN void __kmpc_flush(kmp_Ident *loc); // vote -EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask(); +EXTERN uint64_t __kmpc_warp_active_thread_mask(void); // syncwarp -EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t); +EXTERN void __kmpc_syncwarp(uint64_t); // tasks EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc, uint32_t global_tid,