Index: include/clang/AST/StmtOpenMP.h =================================================================== --- include/clang/AST/StmtOpenMP.h +++ include/clang/AST/StmtOpenMP.h @@ -128,6 +128,10 @@ operator bool() { return Current != End; } }; + /// \brief Gets single clause of the specified kind \a K associated with the + /// current directive iff there is only one clause of this kind. + const OMPClause *getSingleClause(OpenMPClauseKind K) const; + /// \brief Returns starting location of directive kind. SourceLocation getLocStart() const { return StartLoc; } /// \brief Returns ending location of directive. Index: lib/AST/Stmt.cpp =================================================================== --- lib/AST/Stmt.cpp +++ lib/AST/Stmt.cpp @@ -1384,12 +1384,24 @@ return new (Mem) OMPFlushClause(N); } +const OMPClause * +OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const { + auto ClauseFilter = + [=](const OMPClause *C) -> bool { return C->getClauseKind() == K; }; + OMPExecutableDirective::filtered_clause_iterator I( + clauses(), ClauseFilter); + + if (I) { + auto PrevI = I; + assert(!++I && "There is at least 2 clauses of the specified kind"); + return *PrevI; + } + return nullptr; +} + OMPParallelDirective *OMPParallelDirective::Create( - const ASTContext &C, - SourceLocation StartLoc, - SourceLocation EndLoc, - ArrayRef Clauses, - Stmt *AssociatedStmt) { + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *AssociatedStmt) { unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective), llvm::alignOf()); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -67,8 +67,11 @@ // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro // microtask, ...); OMPRTL__kmpc_fork_call, - // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc); - OMPRTL__kmpc_global_thread_num + // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + // Call to __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads }; private: Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -196,6 +196,16 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); + break; + } } return RTLFn; } Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -23,6 +23,22 @@ // OpenMP Directive Emission //===----------------------------------------------------------------------===// +static void EmitOMPNumThreadsClause(CodeGenFunction &CGF, + const OMPNumThreadsClause *C) { + CodeGenFunction::RunCleanupsScope NumThreads(CGF); + + // __kmpc_push_num_threads(&loc, global_tid, num_threads); + llvm::Value *Args[] = { + CGF.CGM.getOpenMPRuntime() + .EmitOpenMPUpdateLocation(CGF, C->getLocStart()), + CGF.CGM.getOpenMPRuntime() + .GetOpenMPGlobalThreadNum(CGF, C->getLocStart()), + CGF.EmitScalarExpr(C->getNumThreads(), /*IgnoreResultAssign*/ true)}; + llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads); + CGF.EmitRuntimeCall(RTLFn, Args); +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { const CapturedStmt *CS = cast(S.getAssociatedStmt()); llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS); @@ -34,6 +50,8 @@ CGF.CapturedStmtInfo = &CGInfo; OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS); } + if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) + EmitOMPNumThreadsClause(*this, cast(C)); // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) llvm::Value *Args[] = { Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -2609,6 +2609,11 @@ << "num_threads" << NumThreads->getSourceRange(); return nullptr; } + // Convert to int32 for runtime call. + ValExpr = PerformImplicitConversion( + ValExpr, + Context.getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true), + /*Action*/ AA_Casting).get(); } return new (Context) Index: test/OpenMP/parallel_num_threads_codegen.cpp =================================================================== --- test/OpenMP/parallel_num_threads_codegen.cpp +++ test/OpenMP/parallel_num_threads_codegen.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template +int tmain() { +#pragma omp parallel num_threads(C) + foo(); +#pragma omp parallel num_threads(T(23)) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +#pragma omp parallel num_threads(2) + foo(); +#pragma omp parallel num_threads(a) + foo(); + return a + tmain() + tmain(); +} + +// CHECK: define [[INT_TY:i[0-9]+]] @main( +// CHECK-DAG: [[S_ADDR:%.+]] = alloca [[S_TY]] +// CHECK-DAG: [[A_ADDR:%.+]] = alloca i8 +// CHECK-DAG: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK-DAG: call void [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] 0) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: store i8 [[S_CHAR_OP]], i8* [[A_ADDR]] +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: [[A_VAL:%.+]] = load i8* [[A_ADDR]] +// CHECK: [[RES:%.+]] = sext i8 [[A_VAL]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: invoke [[INT_TY]] [[TMAIN_CHAR_5:@.+]]() +// CHECK: invoke [[INT_TY]] [[TMAIN_S_1:@.+]]() +// CHECK: call void [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: ret [[INT_TY]] +// CHECK: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK-NEXT: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_S_1]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] 23) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]]) +// CHECK: [[RES:%.+]] = sext i8 [[S_CHAR_OP]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK: } + +#endif