diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6429,10 +6429,8 @@ } if (isOpenMPSimdDirective(Dir->getDirectiveKind())) return CGF.Builder.getInt32(1); - return DefaultThreadLimitVal; } - return DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); + return DefaultThreadLimitVal; } const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( @@ -6575,12 +6573,14 @@ return NumThreads; const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); + // TODO: The standard is not clear how to resolve two thread limit clauses, + // let's pick the teams one if it's present, otherwise the target one. + const auto *ThreadLimitClause = D.getSingleClause(); if (const auto *Dir = dyn_cast_or_null(Child)) { - if (Dir->hasClausesOfKind()) { + if (const auto *TLC = Dir->getSingleClause()) { + ThreadLimitClause = TLC; CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const auto *ThreadLimitClause = - Dir->getSingleClause(); CodeGenFunction::LexicalScope Scope( CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); if (const auto *PreInit = @@ -6595,11 +6595,15 @@ } } } - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } + } + if (ThreadLimitClause) { + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + } + if (const auto *Dir = dyn_cast_or_null(Child)) { if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { CS = Dir->getInnermostCapturedStmt(); @@ -6650,7 +6654,10 @@ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } - return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + if (llvm::Value *NumThreads = + getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) + return NumThreads; + return Bld.getInt32(0); case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15635,6 +15635,7 @@ break; case OMPC_thread_limit: switch (DKind) { + case OMPD_target: case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -15676,7 +15677,6 @@ case OMPD_parallel_for: case OMPD_parallel_for_simd: case OMPD_parallel_loop: - case OMPD_target: case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: diff --git a/clang/test/OpenMP/target_ast_print.cpp b/clang/test/OpenMP/target_ast_print.cpp --- a/clang/test/OpenMP/target_ast_print.cpp +++ b/clang/test/OpenMP/target_ast_print.cpp @@ -1108,6 +1108,8 @@ foo(); #pragma omp target defaultmap(present: pointer) foo(); + #pragma omp target thread_limit(C) + foo(); return 0; } @@ -1119,6 +1121,8 @@ // OMP51-NEXT: foo() // OMP51-NEXT: #pragma omp target defaultmap(present: pointer) // OMP51-NEXT: foo() +// OMP51-NEXT: #pragma omp target thread_limit(C) +// OMP51-NEXT: foo() // OMP51-LABEL: int main(int argc, char **argv) { int main (int argc, char **argv) { diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp --- a/clang/test/OpenMP/target_codegen.cpp +++ b/clang/test/OpenMP/target_codegen.cpp @@ -13,6 +13,13 @@ // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51 + // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s @@ -902,6 +909,52 @@ } }; +#ifdef _DOMP51 +void thread_limit_target(int TargetTL, int TeamsTL) { + +#pragma omp target +{} +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 0, + +#pragma omp target +#pragma omp teams +{} +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 0, + +#pragma omp target thread_limit(TargetTL) +{} +// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr +// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]] +// OMP51: load {{.*}} [[CEA]] +// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]] +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 [[CE]], + +#pragma omp target thread_limit(TargetTL) +#pragma omp teams +{} +// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr +// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]] +// OMP51: load {{.*}} [[CEA]] +// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]] +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[CE]], + +#pragma omp target +#pragma omp teams thread_limit(TeamsTL) +{} +// OMP51: load {{.*}} %TeamsTL.addr +// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]], + +#pragma omp target thread_limit(TargetTL) +#pragma omp teams thread_limit(TeamsTL) +{} +// OMP51: load {{.*}} %TeamsTL.addr +// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr +// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]], + +} +#endif + // CHECK: define internal void @.omp_offloading.requires_reg() // CHECK: call void @__tgt_register_requires(i64 1) // CHECK: ret void