Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h @@ -80,7 +80,10 @@ OMPRTL__kmpc_serialized_parallel, // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); - OMPRTL__kmpc_end_serialized_parallel + OMPRTL__kmpc_end_serialized_parallel, + // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads); + OMPRTL__kmpc_push_num_threads }; private: @@ -250,6 +253,14 @@ /// virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags); + + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 + /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' + /// clause. + /// \param NumThreads An integer value of threads. + virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc); }; } // namespace CodeGen } // namespace clang Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp @@ -296,6 +296,16 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); + break; + } case OMPRTL__kmpc_serialized_parallel: { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); @@ -431,3 +441,15 @@ CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc), + CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; + llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads); + CGF.EmitRuntimeCall(RTLFn, Args); +} + Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -183,6 +183,23 @@ } } +/// \brief Emits code for OpenMP parallel directive in the parallel region. +static void EmitOMPParallelCall(CodeGenFunction &CGF, + const OMPParallelDirective &S, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { + CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + auto NumThreadsClause = cast(C); + auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign*/ true); + CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause( + CGF, NumThreads, NumThreadsClause->getLocStart()); + } + CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(), + OutlinedFn, CapturedStruct); +} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { auto CS = cast(S.getAssociatedStmt()); auto CapturedStruct = GenerateCapturedStmtArgument(*CS); @@ -192,16 +209,13 @@ auto Cond = cast(C)->getCondition(); EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { if (ThenBlock) - CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), - OutlinedFn, CapturedStruct); + EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); else CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(), OutlinedFn, CapturedStruct); }); - } else { - CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), - OutlinedFn, CapturedStruct); - } + } else + EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); } void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, Index: cfe/trunk/lib/Sema/SemaOpenMP.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp @@ -3439,7 +3439,6 @@ SourceLocation EndLoc) { Expr *ValExpr = NumThreads; if (!NumThreads->isValueDependent() && !NumThreads->isTypeDependent() && - !NumThreads->isInstantiationDependent() && !NumThreads->containsUnexpandedParameterPack()) { SourceLocation NumThreadsLoc = NumThreads->getLocStart(); ExprResult Val = Index: cfe/trunk/test/OpenMP/parallel_num_threads_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/parallel_num_threads_codegen.cpp +++ cfe/trunk/test/OpenMP/parallel_num_threads_codegen.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template +int tmain() { +#pragma omp parallel num_threads(C) + foo(); +#pragma omp parallel num_threads(T(23)) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +#pragma omp parallel num_threads(2) + foo(); +#pragma omp parallel num_threads(a) + foo(); + return a + tmain() + tmain(); +} + +// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() +// CHECK-DAG: [[S_ADDR:%.+]] = alloca [[S_TY]] +// CHECK-DAG: [[A_ADDR:%.+]] = alloca i8 +// CHECK-DAG: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK-DAG: call void [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] 0) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: store i8 [[S_CHAR_OP]], i8* [[A_ADDR]] +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: [[A_VAL:%.+]] = load i8* [[A_ADDR]] +// CHECK: [[RES:%.+]] = sext i8 [[A_VAL]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: invoke [[INT_TY:i[0-9]+]] [[TMAIN_CHAR_5:@.+]]() +// CHECK: invoke [[INT_TY]] [[TMAIN_S_1:@.+]]() +// CHECK: call void [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]]) +// CHECK: ret [[INT_TY]] +// CHECK: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK-NEXT: } + +// CHECK: define{{.*}} [[INT_TY]] [[TMAIN_S_1]]() +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]]) +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: call void [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] 23) +// CHECK: [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]]) +// CHECK: [[RES:%.+]] = sext {{.*}}i8 [[S_CHAR_OP]] to i32 +// CHECK: call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]]) +// CHECK: call void [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]]) +// CHECK: call void {{.*}}* @__kmpc_fork_call( +// CHECK: ret [[INT_TY]] 0 +// CHECK: } + +#endif