Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -44,6 +44,9 @@ class CodeGenModule; typedef llvm::function_ref RegionCodeGenTy; +typedef llvm::function_ref)> + IfCodeGenTy; class CGOpenMPRuntime { private: @@ -118,6 +121,12 @@ // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *lck); OMPRTL__kmpc_end_reduce_nowait, + // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_begin_if0, + // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t * new_task); + OMPRTL__kmpc_omp_task_complete_if0, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -522,10 +531,14 @@ /// \param SharedsTy A type which contains references the shared variables. /// \param Shareds Context with the list of shared variables from the \a /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param IfClauseGen A codegen procedure for 'if' clause. virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied, llvm::PointerIntPair Final, llvm::Value *TaskFunction, QualType SharedsTy, - llvm::Value *Shareds); + llvm::Value *Shareds, const Expr *IfCond, + const IfCodeGenTy &IfClauseCodeGen); /// \brief Emit code for the directive that does not require outlining. /// Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -710,6 +710,28 @@ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); break; } + case OMPRTL__kmpc_omp_task_begin_if0: { + // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); + break; + } + case OMPRTL__kmpc_omp_task_complete_if0: { + // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t + // *new_task); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_omp_task_complete_if0"); + break; + } } return RTLFn; } @@ -1555,7 +1577,8 @@ void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, bool Tied, llvm::PointerIntPair Final, - llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) { + llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds, + const Expr *IfCond, const IfCodeGenTy &IfClauseCodeGen) { auto &C = CGM.getContext(); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). @@ -1618,15 +1641,44 @@ CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy, /*Idx=*/KmpTaskTDestructors), CGM.PointerAlignInBytes); - // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); - llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), - getThreadID(CGF, Loc), NewTask}; - // TODO: add check for untied tasks. - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + auto *ThreadID = getThreadID(CGF, Loc); + llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask}; + auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) { + // TODO: add check for untied tasks. + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + }; + auto &&ElseCodeGen = + [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry]( + CodeGenFunction &CGF) { + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs); + // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t *new_task); + CGF.EHStack.pushCleanup( + NormalAndEHCleanup, + createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), + llvm::makeArrayRef(TaskArgs)); + + // Call proxy_task_entry(gtid, new_task); + llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; + CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + }; + if (IfCond) { + IfClauseCodeGen(CGF, IfCond, [&](bool ThenBlock) { + if (ThenBlock) { + ThenCodeGen(CGF); + } else { + ElseCodeGen(CGF); + } + }); + } else { + ThenCodeGen(CGF); + } } static llvm::Value *emitReductionFunction(CodeGenModule &CGM, Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -30,8 +30,8 @@ /// } else { /// CodeGen(false); /// } -static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const std::function &CodeGen) { +static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, + const llvm::function_ref &CodeGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting @@ -505,7 +505,7 @@ S, *CS->getCapturedDecl()->param_begin(), CodeGen); if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { auto Cond = cast(C)->getCondition(); - EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) { + emitOMPIfClause(CGF, Cond, [&](bool ThenBlock) { if (ThenBlock) emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); else @@ -1344,8 +1344,13 @@ Final.setInt(/*IntVal=*/false); } auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + if (auto C = S.getSingleClause(OMPC_if)) { + IfCond = cast(C)->getCondition(); + } CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, - OutlinedFn, SharedsTy, CapturedStruct); + OutlinedFn, SharedsTy, CapturedStruct, + IfCond, emitOMPIfClause); } void CodeGenFunction::EmitOMPTaskyieldDirective( Index: test/OpenMP/task_if_codegen.cpp =================================================================== --- test/OpenMP/task_if_codegen.cpp +++ test/OpenMP/task_if_codegen.cpp @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +// CHECK-LABEL: define void @{{.+}}gtid_test +void gtid_test() { +// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void +#pragma omp parallel +#pragma omp task if (false) + gtid_test(); +// CHECK: ret void +} + +// CHECK: define internal void [[GTID_TEST_REGION1]](i32* [[GTID_PARAM:%.+]], i +// CHECK: store i32* [[GTID_PARAM]], i32** [[GTID_ADDR_REF:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_REF]] +// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]] +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc( +// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to +// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 [[GTID_TEST_REGION2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]]) +// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: ret void + +// CHECK: define internal i32 [[GTID_TEST_REGION2]]( +// CHECK: call void @{{.+}}gtid_test +// CHECK: ret i32 + +template +int tmain(T Arg) { +#pragma omp task if (true) + fn1(); +#pragma omp task if (false) + fn2(); +#pragma omp task if (Arg) + fn3(); + return 0; +} + +// CHECK-LABEL: @main +int main() { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN4:[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task if (true) + fn4(); +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc( +// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to +// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 [[CAP_FN5:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]]) +// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +#pragma omp task if (false) + fn5(); + +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN6:[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to +// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]] +// CHECK: [[OMP_THEN]] +// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: br label %[[OMP_END:.+]] +// CHECK: [[OMP_ELSE]] +// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 [[CAP_FN6:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]]) +// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: br label %[[OMP_END]] +// CHECK: [[OMP_END]] +#pragma omp task if (Arg) + fn6(); + // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain + return tmain(Arg); +} + +// CHECK: define internal i32 [[CAP_FN4]] +// CHECK: call void @{{.+}}fn4 +// CHECK: ret i32 + +// CHECK: define internal i32 [[CAP_FN5]] +// CHECK: call void @{{.+}}fn5 +// CHECK: ret i32 + +// CHECK: define internal i32 [[CAP_FN6]] +// CHECK: call void @{{.+}}fn6 +// CHECK: ret i32 + +// CHECK-LABEL: define {{.+}} @{{.+}}tmain +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN1:[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) + +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc( +// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to +// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 [[CAP_FN2:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]]) +// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) + +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN3:[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to +// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]] +// CHECK: [[OMP_THEN]] +// CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: br label %[[OMP_END:.+]] +// CHECK: [[OMP_ELSE]] +// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 [[CAP_FN3:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]]) +// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: br label %[[OMP_END]] +// CHECK: [[OMP_END]] + +// CHECK: define internal i32 [[CAP_FN1]] +// CHECK: call void @{{.+}}fn1 +// CHECK: ret i32 + +// CHECK: define internal i32 [[CAP_FN2]] +// CHECK: call void @{{.+}}fn2 +// CHECK: ret i32 + +// CHECK: define internal i32 [[CAP_FN3]] +// CHECK: call void @{{.+}}fn3 +// CHECK: ret i32 + +#endif