Index: clang/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.h +++ clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1547,7 +1547,7 @@ /// Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data); + const OMPTaskDataTy &Data, bool NoWait); /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be @@ -2388,7 +2388,7 @@ /// Emit code for 'taskwait' directive. void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) override; + const OMPTaskDataTy &Data, bool NoWait) override; /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6128,13 +6128,15 @@ } void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data, + bool HaveNoWaitClause) { if (!CGF.HaveInsertPoint()) return; if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. - OMPBuilder.createTaskwait(CGF.Builder); + OMPBuilder.createTaskwait(CGF.Builder, CGM.getLangOpts().OpenMP, + HaveNoWaitClause); } else { llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); @@ -6143,34 +6145,60 @@ llvm::Value *NumOfElements; std::tie(NumOfElements, DependenciesArray) = emitDependClause(CGF, Data.Dependences, Loc); - llvm::Value *DepWaitTaskArgs[6]; if (!Data.Dependences.empty()) { - DepWaitTaskArgs[0] = UpLoc; - DepWaitTaskArgs[1] = ThreadID; - DepWaitTaskArgs[2] = NumOfElements; - DepWaitTaskArgs[3] = DependenciesArray.getPointer(); - DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); - DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - - CodeGenFunction::RunCleanupsScope LocalScope(CGF); - - // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, - // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info - // is specified. - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), - DepWaitTaskArgs); + if (CGM.getLangOpts().OpenMP >= 51) { + llvm::Value *DepWaitTaskArgs[7]; + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.getPointer(); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = llvm::ConstantInt::get(CGF.IntTy, + HaveNoWaitClause); + + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); + } else { + // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, + // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence + // info is specified. + llvm::Value *DepWaitTaskArgs[6]; + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.getPointer(); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_wait_deps), + DepWaitTaskArgs); + } } else { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); - llvm::Value *Args[] = {UpLoc, ThreadID}; - // Ignore return result until untied tasks are supported. - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), - Args); + llvm::Value *HaveNowaitClauseVal = + llvm::ConstantInt::get(CGF.IntTy, HaveNoWaitClause); + if (CGM.getLangOpts().OpenMP >= 51) { + llvm::Value *Args[3] = {UpLoc, ThreadID, HaveNowaitClauseVal}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_51), + Args); + } else { + llvm::Value *Args[2] = {UpLoc, ThreadID}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait), + Args); + } } } @@ -12976,7 +13004,8 @@ void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) { + const OMPTaskDataTy &Data, + bool NoWait) { llvm_unreachable("Not supported in SIMD-only mode"); } Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5204,7 +5204,12 @@ OMPTaskDataTy Data; // Build list of dependences buildDependences(S, Data); - CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); + if (S.hasClausesOfKind()) { + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data, true); + } else { + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data, + false); + } } bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { Index: clang/test/OpenMP/task_codegen.cpp =================================================================== --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -3621,7 +3621,7 @@ // CHECK2-51-NEXT: [[A9_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0 // CHECK2-51-NEXT: store i32 10, i32* [[A9_I]], align 4 // CHECK2-51-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 -// CHECK2-51-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) +// CHECK2-51-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 0) // CHECK2-51-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 // CHECK2-51-NEXT: store i32 5, i32* [[TMP54]], align 4 // CHECK2-51-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 @@ -7506,7 +7506,7 @@ // CHECK3-51-NEXT: [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 // CHECK3-51-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !122 // CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-51-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) +// CHECK3-51-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait_51(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]], i32 0) // CHECK3-51-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 // CHECK3-51-NEXT: store i32 4, i32* [[TMP41]], align 4 // CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) @@ -9033,7 +9033,7 @@ // CHECK4-51-NEXT: [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 // CHECK4-51-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !112 // CHECK4-51-NEXT: [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-51-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) +// CHECK4-51-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait_51(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]], i32 0) // CHECK4-51-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-51-NEXT: store i32 4, i32* [[TMP41]], align 4 // CHECK4-51-NEXT: [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) Index: clang/test/OpenMP/taskwait_ast_print.cpp =================================================================== --- clang/test/OpenMP/taskwait_ast_print.cpp +++ clang/test/OpenMP/taskwait_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -19,6 +19,23 @@ #pragma omp taskwait depend(in:a, argc) return a + argc; } + +template +T nmain(T argc) { + static T a; +#pragma omp taskwait +#pragma omp taskwait nowait + return a + argc; +} + +template +T ndmain(T argc) { + static T a; +#pragma omp taskwait +#pragma omp taskwait depend(in:a, argc) nowait + return a + argc; +} + // CHECK: static T a; // CHECK-NEXT: #pragma omp taskwait{{$}} // CHECK-NEXT: #pragma omp taskwait depend(in : a,argc){{$}} @@ -29,6 +46,26 @@ // CHECK-NEXT: #pragma omp taskwait // CHECK-NEXT: #pragma omp taskwait depend(in : a,argc){{$}} +// CHECK: static T a; +// CHECK-NEXT: #pragma omp taskwait{{$}} +// CHECK-NEXT: #pragma omp taskwait nowait{{$}} +// CHECK: static int a; +// CHECK-NEXT: #pragma omp taskwait +// CHECK-NEXT: #pragma omp taskwait nowait{{$}} +// CHECK: static char a; +// CHECK-NEXT: #pragma omp taskwait +// CHECK-NEXT: #pragma omp taskwait nowait{{$}} + +// CHECK: static T a; +// CHECK-NEXT: #pragma omp taskwait{{$}} +// CHECK-NEXT: #pragma omp taskwait depend(in : a,argc) nowait{{$}} +// CHECK: static int a; +// CHECK-NEXT: #pragma omp taskwait +// CHECK-NEXT: #pragma omp taskwait depend(in : a,argc) nowait{{$}} +// CHECK: static char a; +// CHECK-NEXT: #pragma omp taskwait +// CHECK-NEXT: #pragma omp taskwait depend(in : a,argc) nowait{{$}} + int main(int argc, char **argv) { static int a; // CHECK: static int a; @@ -36,7 +73,7 @@ #pragma omp taskwait depend(out:a, argc) // CHECK-NEXT: #pragma omp taskwait // CHECK-NEXT: #pragma omp taskwait depend(out : a,argc) - return tmain(argc) + tmain(argv[0][0]) + a; + return tmain(argc) + tmain(argv[0][0]) + a + nmain(argc) + nmain(argv[0][0]) + ndmain(argc) + ndmain(argv[0][0]); } #endif Index: clang/test/OpenMP/taskwait_depend_nowait_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/taskwait_depend_nowait_codegen.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void foo() {} + +template +T tmain(T &argc) { + static T a; + #pragma omp taskwait depend(in:argc) + return a + argc; +} + +template +T nmain(T &argc) { + static T a; + #pragma omp taskwait depend(in:argc) nowait + return a + argc; +} + +int main(int argc, char **argv) { + int n = 0; + #pragma omp task shared(n,argc) depend(out:n) + n = argc; + return tmain(n) + nmain(n); +} + +// CHECK-LABEL: @main +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: [[ALLOC:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* @{{.+}} to i32 (i32, i8*)*)) +// CHECK: %{{.+}} = call i32 @__kmpc_omp_task_with_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ALLOC]], i32 1, i8* %{{[0-9]*}}, i32 0, i8* null) + +// CHECK-LABEL: define {{.*}} @{{.*}}tmain{{.*}} +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: call void @__kmpc_omp_taskwait_deps_51(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* %{{.}}, i32 0, i8* null, i32 0) + +// CHECK-LABEL: define {{.*}} @{{.*}}nmain{{.*}} +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: call void @__kmpc_omp_taskwait_deps_51(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* %{{.}}, i32 0, i8* null, i32 1) + + +#endif Index: clang/test/OpenMP/taskwait_nowait_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/taskwait_nowait_codegen.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// +// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void foo() {} + +template +T tmain(T argc) { + static T a; +#pragma omp taskwait + return a + argc; +} + +template +T no_wait(T argc) { + static T a; +#pragma omp taskwait nowait + return a + argc; +} + +int main(int argc, char **argv) { +#pragma omp taskwait + return tmain(argc)+no_wait(argc); +} + +// CHECK-LABEL: @main +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: call i32 @__kmpc_omp_taskwait_51(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 0) + +// CHECK-LABEL: define {{.*}} @{{.*}}tmain{{.*}} +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: call i32 @__kmpc_omp_taskwait_51(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 0) +// +// CHECK-LABEL: define {{.*}} @{{.*}}no_wait{{.*}} +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// CHECK: call i32 @__kmpc_omp_taskwait_51(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1) + + +#endif Index: llvm/include/llvm/Frontend/OpenMP/OMP.td =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMP.td +++ llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -528,7 +528,8 @@ def OMP_Barrier : Directive<"barrier"> {} def OMP_TaskWait : Directive<"taskwait"> { let allowedClauses = [ - VersionedClause + VersionedClause, + VersionedClause ]; } def OMP_TaskGroup : Directive<"taskgroup"> { Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -630,7 +630,8 @@ /// Generator for '#omp taskwait' /// /// \param Loc The location where the taskwait directive was encountered. - void createTaskwait(const LocationDescription &Loc); + void createTaskwait(const LocationDescription &Loc, int OpenMPVer = 0, + bool HaveNoTaskWaitClause = false); /// Generator for '#omp taskyield' /// @@ -900,7 +901,8 @@ /// Generate a taskwait runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. - void emitTaskwaitImpl(const LocationDescription &Loc); + void emitTaskwaitImpl(const LocationDescription &Loc, int OpenMPVer, + bool HaveNoWaitClause); /// Generate a taskyield runtime call. /// Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -202,6 +202,7 @@ __OMP_RTL(__kmpc_get_hardware_thread_id_in_block, false, Int32, ) __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr) __OMP_RTL(__kmpc_omp_taskwait, false, Int32, IdentPtr, Int32) +__OMP_RTL(__kmpc_omp_taskwait_51, false, Int32, IdentPtr, Int32, Int32) __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) @@ -369,6 +370,8 @@ __OMP_RTL(__kmpc_omp_wait_deps, false, Void, IdentPtr, Int32, Int32, /* kmp_depend_info_t */ VoidPtr, Int32, VoidPtr) +__OMP_RTL(__kmpc_omp_taskwait_deps_51, false, Void, IdentPtr, Int32, Int32, + /* kmp_depend_info_t */ VoidPtr, Int32, VoidPtr, Int32) __OMP_RTL(__kmpc_cancellationpoint, false, Int32, IdentPtr, Int32, Int32) __OMP_RTL(__kmpc_fork_teams, true, Void, IdentPtr, Int32, ParallelTaskPtr) @@ -632,6 +635,8 @@ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskwait_51, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs, @@ -894,6 +899,9 @@ __OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskwait_deps_51, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1249,23 +1249,34 @@ emitFlush(Loc); } -void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { +void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc, + int OpenMPVer, bool HaveNoWaitClause) { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); uint32_t SrcLocStrSize; Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); - Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; + if (OpenMPVer >= 51) { + Value *NoWaitClauseValue = ConstantInt::get(Int32, HaveNoWaitClause); + Value *Args[] = {Ident, getOrCreateThreadID(Ident), NoWaitClauseValue}; - // Ignore return result until untied tasks are supported. - Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), - Args); + // Ignore return result until untied tasks are supported. + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait_51), Args); + } else { + Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; + + // Ignore return result until untied tasks are supported. + Builder.CreateCall( + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), Args); + } } -void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { +void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc, + int OpenMPVer, bool HaveNoWaitClause) { if (!updateToLocation(Loc)) return; - emitTaskwaitImpl(Loc); + emitTaskwaitImpl(Loc, OpenMPVer, HaveNoWaitClause); } void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { @@ -2293,7 +2304,7 @@ case OMPScheduleType::BaseRuntimeSimd: assert(!ChunkSize && "schedule type does not support user-defined chunk sizes"); - [[fallthrough]]; + LLVM_FALLTHROUGH; case OMPScheduleType::BaseDynamicChunked: case OMPScheduleType::BaseGuidedChunked: case OMPScheduleType::BaseGuidedIterativeChunked: Index: openmp/runtime/src/dllexports =================================================================== --- openmp/runtime/src/dllexports +++ openmp/runtime/src/dllexports @@ -345,6 +345,7 @@ __kmpc_fork_teams 241 __kmpc_omp_task_with_deps 242 __kmpc_omp_wait_deps 243 + __kmpc_omp_taskwait_deps_51 288 __kmpc_cancel 244 __kmpc_cancellationpoint 245 __kmpc_cancel_barrier 246 Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -3946,6 +3946,10 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task); KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); +/* __kmpc_omp_taskwait_51 : Function for OpenMP 5.1 nowait clause. + * Placeholder for taskwait with nowait clause.*/ +KMP_EXPORT kmp_int32 __kmpc_omp_taskwait_51(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 have_no_wait); KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part); @@ -3970,6 +3974,15 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); +/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause. + * Placeholder for taskwait with nowait clause.*/ +KMP_EXPORT void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list, + kmp_int32 have_no_wait = false); + extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate); Index: openmp/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/runtime/src/kmp_taskdeps.cpp +++ openmp/runtime/src/kmp_taskdeps.cpp @@ -878,3 +878,149 @@ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref)); } + +/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause. + * Placeholder for taskwait with nowait clause. + * The code is a copy of + * __kmpc_omp_taskwait_deps.*/ +void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list, + kmp_int32 have_no_wait) { + KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n", + gtid, loc_ref, have_no_wait)); + if (ndeps == 0 && ndeps_noalias == 0) { + KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to " + "wait upon : loc=%p\n", + gtid, loc_ref)); + return; + } + __kmp_assert_valid_gtid(gtid); + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *current_task = thread->th.th_current_task; + +#if OMPT_SUPPORT + // this function represents a taskwait construct with depend clause + // We signal 4 events: + // - creation of the taskwait task + // - dependences of the taskwait task + // - schedule and finish of the taskwait task + ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data; + KMP_ASSERT(taskwait_task_data->ptr == NULL); + if (ompt_enabled.enabled) { + if (!current_task->ompt_task_info.frame.enter_frame.ptr) + current_task->ompt_task_info.frame.enter_frame.ptr = + OMPT_GET_FRAME_ADDRESS(0); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + &(current_task->ompt_task_info.task_data), + &(current_task->ompt_task_info.frame), taskwait_task_data, + ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1, + OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + } + } + +#if OMPT_OPTIONAL + /* OMPT grab all dependences if requested by the tool */ + if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { + kmp_int32 i; + + int ompt_ndeps = ndeps + ndeps_noalias; + ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( + thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); + + KMP_ASSERT(ompt_deps != NULL); + + for (i = 0; i < ndeps; i++) { + ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; + if (dep_list[i].flags.in && dep_list[i].flags.out) + ompt_deps[i].dependence_type = ompt_dependence_type_inout; + else if (dep_list[i].flags.out) + ompt_deps[i].dependence_type = ompt_dependence_type_out; + else if (dep_list[i].flags.in) + ompt_deps[i].dependence_type = ompt_dependence_type_in; + else if (dep_list[i].flags.mtx) + ompt_deps[ndeps + i].dependence_type = + ompt_dependence_type_mutexinoutset; + else if (dep_list[i].flags.set) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; + } + for (i = 0; i < ndeps_noalias; i++) { + ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; + else if (noalias_dep_list[i].flags.out) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; + else if (noalias_dep_list[i].flags.in) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; + else if (noalias_dep_list[i].flags.mtx) + ompt_deps[ndeps + i].dependence_type = + ompt_dependence_type_mutexinoutset; + else if (noalias_dep_list[i].flags.set) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset; + } + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + taskwait_task_data, ompt_deps, ompt_ndeps); + /* We can now free the allocated memory for the dependences */ + /* For OMPD we might want to delay the free until end of this function */ + KMP_OMPT_DEPS_FREE(thread, ompt_deps); + ompt_deps = NULL; + } +#endif /* OMPT_OPTIONAL */ +#endif /* OMPT_SUPPORT */ + + // We can return immediately as: + // - dependences are not computed in serial teams (except with proxy tasks) + // - if the dephash is not yet created it means we have nothing to wait for + bool ignore = current_task->td_flags.team_serial || + current_task->td_flags.tasking_ser || + current_task->td_flags.final; + ignore = + ignore && thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE && + thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE; + ignore = ignore || current_task->td_dephash == NULL; + + if (ignore) { + KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " + "dependences : loc=%p\n", + gtid, loc_ref)); +#if OMPT_SUPPORT + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); +#endif /* OMPT_SUPPORT */ + return; + } + + kmp_depnode_t node = {0}; + __kmp_init_node(&node); + + if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash, + DEP_BARRIER, ndeps, dep_list, ndeps_noalias, + noalias_dep_list)) { + KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking " + "dependences : loc=%p\n", + gtid, loc_ref)); +#if OMPT_SUPPORT + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); +#endif /* OMPT_SUPPORT */ + return; + } + + int thread_finished = FALSE; + kmp_flag_32 flag( + (std::atomic *)&node.dn.npredecessors, 0U); + while (node.dn.npredecessors > 0) { + flag.execute_tasks(thread, gtid, FALSE, + &thread_finished USE_ITT_BUILD_ARG(NULL), + __kmp_task_stealing_constraint); + } + +#if OMPT_SUPPORT + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); +#endif /* OMPT_SUPPORT */ + KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\ + \n", + gtid, loc_ref)); + +} Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -2049,7 +2049,8 @@ template static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, void *frame_address, - void *return_address) { + void *return_address, + kmp_int32 have_no_wait) { kmp_taskdata_t *taskdata = nullptr; kmp_info_t *thread; int thread_finished = FALSE; @@ -2162,9 +2163,10 @@ OMPT_NOINLINE static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, void *frame_address, - void *return_address) { + void *return_address, + kmp_int32 have_no_wait) { return __kmpc_omp_taskwait_template(loc_ref, gtid, frame_address, - return_address); + return_address, have_no_wait); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL @@ -2175,10 +2177,27 @@ if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid), 0); } #endif - return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); + return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL, 0); +} + +/* __kmpc_omp_taskwait_51 : Function for OpenMP 5.1 nowait clause. + * Placeholder for taskwait with nowait clause. + * The code is a copy of __kmpc_omp_taskwait.*/ +kmp_int32 __kmpc_omp_taskwait_51(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 have_no_wait) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.enabled)) { + OMPT_STORE_RETURN_ADDRESS(gtid); + return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), + OMPT_LOAD_RETURN_ADDRESS(gtid), + have_no_wait); + } +#endif + return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL, + have_no_wait); } // __kmpc_omp_taskyield: switch to a different task