Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -128,6 +128,10 @@ operator bool() { return Current != End; } }; + /// \brief Gets single clause of the specified kind \a K associated with the + /// current directive iff there is only one clause of this kind. + const OMPClause *getSingleClause(OpenMPClauseKind K) const; + /// \brief Returns starting location of directive kind. SourceLocation getLocStart() const { return StartLoc; } /// \brief Returns ending location of directive. Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -1350,6 +1350,21 @@ return new (Mem) OMPFlushClause(N); } +const OMPClause * +OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const { + auto ClauseFilter = + [=](const OMPClause *C) -> bool { return C->getClauseKind() == K; }; + OMPExecutableDirective::filtered_clause_iterator I( + clauses(), ClauseFilter); + + if (I) { + auto PrevI = I; + assert(!++I && "There is at least 2 clauses of the specified kind"); + return *PrevI; + } + return nullptr; +} + OMPParallelDirective *OMPParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -68,7 +68,13 @@ // microtask, ...); OMPRTL__kmpc_fork_call, // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc); - OMPRTL__kmpc_global_thread_num + OMPRTL__kmpc_global_thread_num, + // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_serialized_parallel, + // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + OMPRTL__kmpc_end_serialized_parallel }; private: Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -127,32 +127,26 @@ OpenMPGtidMapTy::iterator I = OpenMPGtidMap.find(CGF.CurFn); if (I != OpenMPGtidMap.end()) { GTid = I->second; + } else if (auto OMPRegionInfo = + dyn_cast_or_null( + CGF.CapturedStmtInfo)) { + assert(OMPRegionInfo->getGTidVariable() != nullptr && + "No GTid variable for OpenMP region."); + auto GTidVar = OMPRegionInfo->getGTidVariable(); + auto LVal = CGF.MakeNaturalAlignAddrLValue( + CGF.GetAddrOfLocalVar(GTidVar), + CGF.getContext().getPointerType(GTidVar->getType())); + auto RVal = CGF.EmitLoadOfLValue(LVal, SourceLocation()); + LVal = + CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), GTidVar->getType()); + GTid = CGF.EmitLoadOfLValue(LVal, SourceLocation()).getScalarVal(); } else { - // Check if current function is a function which has first parameter - // with type int32 and name ".global_tid.". - if (!CGF.CurFn->arg_empty() && - CGF.CurFn->arg_begin()->getType()->isPointerTy() && - CGF.CurFn->arg_begin() - ->getType() - ->getPointerElementType() - ->isIntegerTy() && - CGF.CurFn->arg_begin() - ->getType() - ->getPointerElementType() - ->getIntegerBitWidth() == 32 && - CGF.CurFn->arg_begin()->hasName() && - CGF.CurFn->arg_begin()->getName() == ".global_tid.") { - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - GTid = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin()); - } else { - // Generate "int32 .kmpc_global_thread_num.addr;" - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)}; - GTid = CGF.EmitRuntimeCall( - CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args); - } + // Generate "int32 .kmpc_global_thread_num.addr;" + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)}; + GTid = CGF.EmitRuntimeCall( + CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args); OpenMPGtidMap[CGF.CurFn] = GTid; } return GTid; @@ -184,7 +178,7 @@ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, true); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; } @@ -192,10 +186,29 @@ // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false); + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } + case OMPRTL__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); + break; + } } return RTLFn; } + Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -16,6 +16,7 @@ #include "CodeGenModule.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "llvm/IR/CallSite.h" using namespace clang; using namespace CodeGen; @@ -23,29 +24,137 @@ // OpenMP Directive Emission //===----------------------------------------------------------------------===// +static void EmitOMPParallelCall(CodeGenFunction &CGF, + const OMPParallelDirective &S, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) + llvm::Value *Args[] = { + CGF.CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(CGF, S.getLocStart()), + CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument + // (there is only one additional argument - 'context') + CGF.Builder.CreateBitCast( + OutlinedFn, CGF.CGM.getOpenMPRuntime().getKmpc_MicroPointerTy()), + CGF.EmitCastToVoidPtr(CapturedStruct)}; + auto RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_fork_call); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +static llvm::Value *EmitGTidAddress(CodeGenFunction &CGF, llvm::Value *GTid) { + auto Int32Ty = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); + if (auto OMPRegionInfo = + dyn_cast_or_null( + CGF.CapturedStmtInfo)) { + assert(OMPRegionInfo->getGTidVariable() != nullptr && + "No GTid in OpenMP region."); + auto LVal = CGF.MakeNaturalAlignAddrLValue( + CGF.GetAddrOfLocalVar(OMPRegionInfo->getGTidVariable()), + CGF.getContext().getPointerType(Int32Ty)); + return CGF.EmitLoadOfLValue(LVal, SourceLocation()).getScalarVal(); + } + auto GTidTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".gtid_temp."); + CGF.EmitStoreOfScalar(GTid, + CGF.MakeNaturalAlignAddrLValue(GTidTemp, Int32Ty)); + + return GTidTemp; +} + +static void EmitOMPSerialCall(CodeGenFunction &CGF, + const OMPParallelDirective &S, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + auto Int32Ty = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); + auto Zero = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero."); + CGF.InitTempAlloca(Zero, CGF.Builder.getInt32(/*C*/ 0)); + auto GTid = + CGF.CGM.getOpenMPRuntime().GetOpenMPGlobalThreadNum(CGF, S.getLocStart()); + llvm::Value *GTidAddr = EmitGTidAddress(CGF, GTid); + // Build calls: + // __kmpc_serialized_parallel(&StartLoc, GTid); + llvm::Value *SerArgs[] = { + CGF.CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(CGF, S.getLocStart()), + GTid}; + auto RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_serialized_parallel); + CGF.EmitRuntimeCall(RTLFn, SerArgs); + // OutlinedFn(>id, &zero, CapturedStruct); + llvm::Value *OutlinedFnArgs[] = {GTidAddr, Zero, CapturedStruct}; + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + // __kmpc_end_serialized_parallel(&EndLoc, GTid); + llvm::Value *EndSerArgs[] = { + CGF.CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(CGF, S.getLocEnd()), + GTid}; + RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_end_serialized_parallel); + CGF.EmitRuntimeCall(RTLFn, EndSerArgs); +} + +template +static void EmitConditionalCode(CodeGenFunction &CGF, const Expr *Cond, + CodeGenTy CodeGen) { + CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); + + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + bool CondConstant; + if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { + CodeGen(CondConstant); + return; + } + + // Otherwise, the condition did not fold, or we couldn't elide it. Just + // emit the conditional branch. + auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); + auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); + auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); + CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); + + // Emit the 'then' code. + CGF.EmitBlock(ThenBlock); + CodeGen(/*ThenBlock*/ true); + CGF.EmitBranch(ContBlock); + // Emit the 'else' code if present. + { + // There is no need to emit line number for unconditional branch. + SuppressDebugLocation SDL(CGF.Builder); + CGF.EmitBlock(ElseBlock); + } + CodeGen(/*ThenBlock*/ false); + { + // There is no need to emit line number for unconditional branch. + SuppressDebugLocation SDL(CGF.Builder); + CGF.EmitBranch(ContBlock); + } + // Emit the continuation block for code after the if. + CGF.EmitBlock(ContBlock, /*IsFinished*/ true); +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { const CapturedStmt *CS = cast(S.getAssociatedStmt()); llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS); llvm::Value *OutlinedFn; { - CodeGenFunction CGF(CGM, true); - CGCapturedStmtInfo CGInfo(*CS, CS->getCapturedRegionKind()); + CodeGenFunction CGF(CGM, /*suppressNewContext*/ true); + CGOpenMPRegionInfo CGInfo(*CS, *CS->getCapturedDecl()->param_begin()); CGF.CapturedStmtInfo = &CGInfo; OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS); } - // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) - llvm::Value *Args[] = { - CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(*this, S.getLocStart()), - Builder.getInt32(1), // Number of arguments after 'microtask' argument - // (there is only one additional argument - 'context') - Builder.CreateBitCast(OutlinedFn, - CGM.getOpenMPRuntime().getKmpc_MicroPointerTy()), - EmitCastToVoidPtr(CapturedStruct)}; - llvm::Constant *RTLFn = CGM.getOpenMPRuntime().CreateRuntimeFunction( - CGOpenMPRuntime::OMPRTL__kmpc_fork_call); - EmitRuntimeCall(RTLFn, Args); + if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { + auto Cond = cast(C)->getCondition(); + EmitConditionalCode(*this, Cond, [&](bool ThenBlock) { + if (ThenBlock) + EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); + else + EmitOMPSerialCall(*this, S, OutlinedFn, CapturedStruct); + }); + } else { + EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); + } } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { @@ -135,4 +244,3 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) { llvm_unreachable("CodeGen for 'omp atomic' is not supported yet."); } - Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -224,6 +224,8 @@ /// \brief Get the name of the capture helper. virtual StringRef getHelperName() const { return "__captured_stmt"; } + static bool classof(const CGCapturedStmtInfo *) { return true; } + private: /// \brief The kind of captured statement being generated. CapturedRegionKind Kind; @@ -238,6 +240,27 @@ /// \brief Captured 'this' type. FieldDecl *CXXThisFieldDecl; }; + + /// \brief API for captured statement code generation in OpenMP constructs. + class CGOpenMPRegionInfo : public CGCapturedStmtInfo { + public: + CGOpenMPRegionInfo(const CapturedStmt &S, const VarDecl *GTidVar) + : CGCapturedStmtInfo(S, CR_OpenMP), GTidVar(GTidVar) {} + + /// \brief Gets a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getGTidVariable() const { return GTidVar; } + + static bool classof(const CGCapturedStmtInfo *Info) { + return Info->getKind() == CR_OpenMP; + } + + private: + /// \brief A variable or parameter storing global thread id for OpenMP + /// constructs. + const VarDecl *GTidVar; + }; + CGCapturedStmtInfo *CapturedStmtInfo; /// BoundsChecking - Emit run-time bounds checks. Higher values mean Index: test/OpenMP/parallel_if_codegen.cpp =================================================================== --- test/OpenMP/parallel_if_codegen.cpp +++ test/OpenMP/parallel_if_codegen.cpp @@ -0,0 +1,124 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +// CHECK-LABEL: define void @{{.+}}gtid_test +void gtid_test() { +// CHECK: call void {{.+}}* @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void +#pragma omp parallel +#pragma omp parallel if (false) + gtid_test(); +// CHECK: ret void +} + +// CHECK: define internal void [[GTID_TEST_REGION1]](i{{.+}}* [[GTID_PARAM:%.+]], i +// CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_REF]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]] +// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_REF]] +// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]]) +// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[GTID_ADDR]] +// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]]) +// CHECK: ret void + +// CHECK: define internal void [[GTID_TEST_REGION2]]( +// CHECK: call void @{{.+}}gtid_test +// CHECK: ret void + +template +int tmain(T Arg) { +#pragma omp parallel if (true) + fn1(); +#pragma omp parallel if (false) + fn2(); +#pragma omp parallel if (Arg) + fn3(); + return 0; +} + +// CHECK-LABEL: define {{[a-z]*[ ]?i32}} @main() +int main() { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN4:@.+]] to void +#pragma omp parallel if (true) + fn4(); +// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]], +// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: call void [[CAP_FN5:@.+]](i32* [[GTID_ADDR]], +// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +#pragma omp parallel if (false) + fn5(); + +// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]] +// CHECK: [[OMP_THEN]]: +// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN6:@.+]] to void +// CHECK: br label %[[OMP_END:.+]] +// CHECK: [[OMP_ELSE]]: +// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]], +// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 %0) +// CHECK: call void [[CAP_FN6]](i32* [[GTID_ADDR]], +// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: br label %[[OMP_END]] +// CHECK: [[OMP_END]]: +#pragma omp parallel if (Arg) + fn6(); + // CHECK: = call i{{.+}} @{{.+}}tmain + return tmain(Arg); +} + +// CHECK: define internal void [[CAP_FN4]] +// CHECK: call void @{{.+}}fn4 +// CHECK: ret void + +// CHECK: define internal void [[CAP_FN5]] +// CHECK: call void @{{.+}}fn5 +// CHECK: ret void + +// CHECK: define internal void [[CAP_FN6]] +// CHECK: call void @{{.+}}fn6 +// CHECK: ret void + +// CHECK-LABEL: define {{.+}} @{{.+}}tmain +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( +// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN1:@.+]] to void +// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]], +// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: call void [[CAP_FN2:@.+]](i32* [[GTID_ADDR]], +// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]] +// CHECK: [[OMP_THEN]]: +// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN3:@.+]] to void +// CHECK: br label %[[OMP_END:.+]] +// CHECK: [[OMP_ELSE]]: +// CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]], +// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 %0) +// CHECK: call void [[CAP_FN3]](i32* [[GTID_ADDR]], +// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: br label %[[OMP_END]] +// CHECK: [[OMP_END]]: + +// CHECK: define internal void [[CAP_FN1]] +// CHECK: call void @{{.+}}fn1 +// CHECK: ret void + +// CHECK: define internal void [[CAP_FN2]] +// CHECK: call void @{{.+}}fn2 +// CHECK: ret void + +// CHECK: define internal void [[CAP_FN3]] +// CHECK: call void @{{.+}}fn3 +// CHECK: ret void + +#endif