diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -381,7 +381,7 @@ cast(*S)); break; case Stmt::OMPInteropDirectiveClass: - llvm_unreachable("Interop directive not supported yet."); + EmitOMPInteropDirective(cast(*S)); break; case Stmt::OMPDispatchDirectiveClass: llvm_unreachable("Dispatch directive not supported yet."); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6300,6 +6300,60 @@ [](CodeGenFunction &) { return nullptr; }); } +void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + llvm::Value *Device = nullptr; + if (const auto *C = S.getSingleClause()) + Device = EmitScalarExpr(C->getDevice()); + + llvm::Value *NumDependences = nullptr; + llvm::Value *DependenceAddress = nullptr; + if (const auto *DC = S.getSingleClause()) { + OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), + DC->getModifier()); + Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); + std::pair DependencePair = + CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies, + DC->getBeginLoc()); + NumDependences = DependencePair.first; + DependenceAddress = Builder.CreatePointerCast( + DependencePair.second.getPointer(), CGM.Int8PtrTy); + } + + assert(!(S.hasClausesOfKind() && + !(S.getSingleClause() || + S.getSingleClause() || + S.getSingleClause())) && + "OMPNowaitClause clause is used separately in OMPInteropDirective."); + + if (const auto *C = S.getSingleClause()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; + if (C->getIsTarget()) { + InteropType = llvm::omp::OMPInteropType::Target; + } else { + assert(C->getIsTargetSync() && "Expected interop-type target/targetsync"); + InteropType = llvm::omp::OMPInteropType::TargetSync; + } + OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind()); + } else if (const auto *C = S.getSingleClause()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind()); + } else if (const auto *C = S.getSingleClause()) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, + NumDependences, DependenceAddress, + S.hasClausesOfKind()); + } +} + static void emitTargetTeamsDistributeParallelForRegion( CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, PrePostActionTy &Action) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3511,6 +3511,7 @@ const OMPTargetTeamsDistributeParallelForSimdDirective &S); void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); + void EmitOMPInteropDirective(const OMPInteropDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, diff --git a/clang/test/OpenMP/interop_irbuilder.cpp b/clang/test/OpenMP/interop_irbuilder.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/interop_irbuilder.cpp @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs +// RUN: %clang_cc1 -verify -triple x86_64-unknown-linux -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s + +// expected-no-diagnostics +typedef void *omp_interop_t; + +void test1() { + + int device_id = 4; + int D0, D1; + omp_interop_t interop; + +#pragma omp interop init(target \ + : interop) + +#pragma omp interop init(targetsync \ + : interop) + +#pragma omp interop init(target \ + : interop) device(device_id) + +#pragma omp interop init(targetsync \ + : interop) device(device_id) + +#pragma omp interop use(interop) depend(in \ + : D0, D1) nowait + +#pragma omp interop destroy(interop) depend(in \ + : D0, D1) +} + +// CHECK-LABEL: @_Z5test1v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DEVICE_ID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[D0:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[D1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[INTEROP:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 +// CHECK-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 4, i32* [[DEVICE_ID]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__tgt_interop_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i8** [[INTEROP]], i64 1, i32 -1, i32 0, i8* null, i32 0) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__tgt_interop_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i8** [[INTEROP]], i64 2, i32 -1, i32 0, i8* null, i32 0) +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DEVICE_ID]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__tgt_interop_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i8** [[INTEROP]], i64 1, i32 [[TMP0]], i32 0, i8* null, i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DEVICE_ID]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__tgt_interop_init(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i8** [[INTEROP]], i64 2, i32 [[TMP1]], i32 0, i8* null, i32 0) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[D0]] to i64 +// CHECK-NEXT: store i64 [[TMP5]], i64* [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, i64* [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, i8* [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[D1]] to i64 +// CHECK-NEXT: store i64 [[TMP10]], i64* [[TMP9]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, i64* [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, i8* [[TMP12]], align 8 +// CHECK-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast %struct.kmp_depend_info* [[TMP2]] to i8* +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__tgt_interop_use(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4]], i8** [[INTEROP]], i32 -1, i32 2, i8* [[TMP13]], i32 1) +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = ptrtoint i32* [[D0]] to i64 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, i64* [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, i8* [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP14]], i64 1 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = ptrtoint i32* [[D1]] to i64 +// CHECK-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, i64* [[TMP23]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP20]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, i8* [[TMP24]], align 8 +// CHECK-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR6]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast %struct.kmp_depend_info* [[TMP14]] to i8* +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__tgt_interop_destroy(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i8** [[INTEROP]], i32 -1, i32 2, i8* [[TMP25]], i32 0) +// CHECK-NEXT: ret void +// diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -128,6 +128,9 @@ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask) }; +/// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.: +enum class OMPInteropType { Unknown, Target, TargetSync }; + } // end namespace omp } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -779,6 +779,56 @@ llvm::ConstantInt *Size, const llvm::Twine &Name = Twine("")); + /// Create a runtime call for kmpc_interop_init + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param InteropType type of interop operation + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the kmpc_interop_init call + CallInst *createOMPInteropInit(const LocationDescription &Loc, + Value *InteropVar, + omp::OMPInteropType InteropType, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause); + + /// Create a runtime call for kmpc_interop_destroy + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the kmpc_interop_destroy call + CallInst *createOMPInteropDestroy(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause); + + /// Create a runtime call for kmpc_interop_use + /// + /// \param Loc The insert and source location description. + /// \param InteropVar variable to be allocated + /// \param Device devide to which offloading will occur + /// \param NumDependences number of dependence variables + /// \param DependenceAddress pointer to dependence variables + /// \param HaveNowaitClause does nowait clause exist + /// + /// \returns CallInst to the kmpc_interop_use call + CallInst *createOMPInteropUse(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, Value *DependenceAddress, + bool HaveNowaitClause); + + /// Declarations for LLVM-IR types (simple, array, function and structure) are /// The `omp target` interface /// /// For more information about the usage of this interface, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -371,6 +371,15 @@ __OMP_RTL(__kmpc_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, VoidPtr) __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr) +__OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, + Int64, Int32, Int32, VoidPtr, Int32) + +__OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr, + Int32, Int32, VoidPtr, Int32) + +__OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, + Int32, Int32, VoidPtr, Int32) + __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, /* Int */ Int32, /* omp_memespace_handle_t */ VoidPtr, /* Int */ Int32, /* omp_alloctrait_t */ VoidPtr) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2173,6 +2173,90 @@ return Builder.CreateCall(Fn, Args, Name); } +CallInst *OpenMPIRBuilder::createOMPInteropInit( + const LocationDescription &Loc, Value *InteropVar, + omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, + Value *DependenceAddress, bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == NULL) + Device = ConstantInt::get(M.getContext(), APInt(32, -1, true)); + Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = + ConstantInt::get(M.getContext(), APInt(32, HaveNowaitClause, true)); + Value *Args[] = { + Ident, ThreadId, InteropVar, InteropTypeVal, + Device, NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init); + + return Builder.CreateCall(Fn, Args); +} + +CallInst *OpenMPIRBuilder::createOMPInteropDestroy( + const LocationDescription &Loc, Value *InteropVar, Value *Device, + Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == NULL) + Device = ConstantInt::get(M.getContext(), APInt(32, -1, true)); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = + ConstantInt::get(M.getContext(), APInt(32, HaveNowaitClause, true)); + Value *Args[] = { + Ident, ThreadId, InteropVar, Device, + NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy); + + return Builder.CreateCall(Fn, Args); +} + +CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc, + Value *InteropVar, Value *Device, + Value *NumDependences, + Value *DependenceAddress, + bool HaveNowaitClause) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(Loc.IP); + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + if (Device == NULL) + Device = ConstantInt::get(M.getContext(), APInt(32, -1, true)); + if (NumDependences == nullptr) { + NumDependences = ConstantInt::get(Int32, 0); + PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext()); + DependenceAddress = ConstantPointerNull::get(PointerTypeVar); + } + Value *HaveNowaitClauseVal = + ConstantInt::get(M.getContext(), APInt(32, HaveNowaitClause, true)); + Value *Args[] = { + Ident, ThreadId, InteropVar, Device, + NumDependences, DependenceAddress, HaveNowaitClauseVal}; + + Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use); + + return Builder.CreateCall(Fn, Args); +} + CallInst *OpenMPIRBuilder::createCachedThreadPrivate( const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name) {