Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -128,6 +128,12 @@ operator bool() { return Current != End; } }; + /// \brief Gets a single clause of the specified kind \a K associated with the + /// current directive iff there is only one clause of this kind (and assertion + /// is fired if there is more than one clause is associated with the + /// directive). + const OMPClause *getSingleClause(OpenMPClauseKind K) const; + /// \brief Returns starting location of directive kind. SourceLocation getLocStart() const { return StartLoc; } /// \brief Returns ending location of directive. Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -1384,6 +1384,21 @@ return new (Mem) OMPFlushClause(N); } +const OMPClause * +OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const { + auto ClauseFilter = + [=](const OMPClause *C) -> bool { return C->getClauseKind() == K; }; + OMPExecutableDirective::filtered_clause_iterator I( + clauses(), ClauseFilter); + + if (I) { + auto Clause = *I; + assert(!++I && "There are at least 2 clauses of the specified kind"); + return Clause; + } + return nullptr; +} + OMPParallelDirective *OMPParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -76,7 +76,10 @@ OMPRTL__kmpc_critical, // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); - OMPRTL__kmpc_end_critical + OMPRTL__kmpc_end_critical, + // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads }; private: @@ -208,6 +211,15 @@ virtual void EmitOMPCriticalRegionEnd(CodeGenFunction &CGF, llvm::Value *RegionLock, SourceLocation Loc); + + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads); + /// \param CGF Reference to current CodeGenFunction. + /// \param NumThreads An integer value of threads. + /// \param Loc Location of the construct. + virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -219,6 +219,16 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); break; } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); + break; + } } return RTLFn; } @@ -260,3 +270,16 @@ CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_end_critical); CGF.EmitRuntimeCall(RTLFn, Args); } + +void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // __kmpc_push_num_threads(&loc, global_tid, num_threads); + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPGlobalThreadNum(CGF, Loc), + CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; + llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads); + CGF.EmitRuntimeCall(RTLFn, Args); +} + Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -34,6 +34,14 @@ CGF.CapturedStmtInfo = &CGInfo; OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS); } + if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { + RunCleanupsScope NumThreadsScope(*this); + auto NumThreadsClause = cast(C); + auto NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign*/ true); + CGM.getOpenMPRuntime().EmitOMPNumThreadsClause( + *this, NumThreads, NumThreadsClause->getLocStart()); + } // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) llvm::Value *Args[] = { Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -2724,7 +2724,6 @@ SourceLocation EndLoc) { Expr *ValExpr = NumThreads; if (!NumThreads->isValueDependent() && !NumThreads->isTypeDependent() && - !NumThreads->isInstantiationDependent() && !NumThreads->containsUnexpandedParameterPack()) { SourceLocation NumThreadsLoc = NumThreads->getLocStart(); ExprResult Val = Index: test/OpenMP/parallel_num_threads_codegen.cpp =================================================================== --- test/OpenMP/parallel_num_threads_codegen.cpp +++ test/OpenMP/parallel_num_threads_codegen.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template +int tmain() { +#pragma omp parallel num_threads(C) + foo(); +#pragma omp parallel num_threads(T(23)) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +#pragma omp parallel num_threads(2) + foo(); +#pragma omp parallel num_threads(a) + foo(); + return a + tmain() + tmain(); +} + +// CHECK: define [[INT_TY:i[0-9]+]] @main( +// CHECK-DAG: [[S_ADDR:%.+]] = alloca [[S_TY]] +// CHECK-DAG: [[A_ADDR:%.+]] = alloca i8 +// CHECK-DAG: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK-DAG: call void [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] 0) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: store i8 [[S_CHAR_OP]], i8* [[A_ADDR]] +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: [[A_VAL:%.+]] = load i8* [[A_ADDR]] +// CHECK: [[RES:%.+]] = sext i8 [[A_VAL]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: invoke [[INT_TY]] [[TMAIN_CHAR_5:@.+]]() +// CHECK: invoke [[INT_TY]] [[TMAIN_S_1:@.+]]() +// CHECK: call void [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: ret [[INT_TY]] +// CHECK: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK-NEXT: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_S_1]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] 23) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]]) +// CHECK: [[RES:%.+]] = sext i8 [[S_CHAR_OP]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK: } + +#endif