diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -226,6 +226,9 @@ omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); + // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL + Type *getLanemaskType(); + /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -153,6 +153,7 @@ __OMP_TYPE(Int64Ptr) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) +OMP_TYPE(LanemaskTy, getLanemaskType()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) @@ -553,8 +554,9 @@ Int16, VoidPtrPtr) __OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, ) -__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) + +__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,) +__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy) __OMP_RTL(__last, false, Void, ) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" @@ -217,6 +218,14 @@ return Ident; } +Type *OpenMPIRBuilder::getLanemaskType() { + LLVMContext &Ctx = M.getContext(); + Triple triple(M.getTargetTriple()); + + // This test is adequate until deviceRTL has finer grained lane widths + return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); +} + Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -629,6 +629,10 @@ declare void @__kmpc_push_target_tripcount(i64, i64) +declare i32 @__kmpc_warp_active_thread_mask() + +declare void @__kmpc_syncwarp(i32) + declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) @@ -1142,6 +1146,12 @@ ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask() + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_syncwarp(i32) + ; CHECK: ; Function Attrs: nounwind ; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) @@ -1661,6 +1671,12 @@ ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask() + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32) + ; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) diff --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -S -openmpopt | FileCheck %s +; RUN: opt < %s -S -passes=openmpopt | FileCheck %s +; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC + +target triple = "amdgcn-amd-amdhsa" + +define void @call_all(i64 %arg) { + call void @__kmpc_syncwarp(i64 %arg) + call i64 @__kmpc_warp_active_thread_mask() + ret void +} + +declare i64 @__kmpc_warp_active_thread_mask() + +declare void @__kmpc_syncwarp(i64) + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask() + +; CHECK: ; Function Attrs: convergent nounwind +; CHECK-NEXT: declare void @__kmpc_syncwarp(i64) + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask() + +; OPTIMISTIC: ; Function Attrs: convergent nounwind +; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)