Index: include/clang/Basic/OpenMPKinds.h =================================================================== --- include/clang/Basic/OpenMPKinds.h +++ include/clang/Basic/OpenMPKinds.h @@ -109,6 +109,12 @@ /// parallel', otherwise - false. bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind); +/// \brief Checks if the specified directive is a target-kind directive. +/// \param DKind Specified directive. +/// \return true - the directive is a target-like directive like 'omp target', +/// otherwise - false. +bool isOpenMPTargetDirective(OpenMPDirectiveKind DKind); + /// \brief Checks if the specified directive is a teams-kind directive. /// \param DKind Specified directive. /// \return true - the directive is a teams-like directive like 'omp teams', Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -7720,6 +7720,11 @@ /// is performed. bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level); + /// \brief Check if the specified variable is captured by 'target' directive. + /// \param Level Relative level of nested OpenMP construct for that the check + /// is performed. + bool isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level); + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, Expr *Op); /// \brief Called on start of new data sharing attribute block. Index: lib/Basic/OpenMPKinds.cpp =================================================================== --- lib/Basic/OpenMPKinds.cpp +++ lib/Basic/OpenMPKinds.cpp @@ -402,6 +402,10 @@ DKind == OMPD_parallel_sections; // TODO add next directives. } +bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) { + return DKind == OMPD_target; // TODO add next directives. +} + bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_teams; // TODO add next directives. } Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -13152,10 +13152,18 @@ if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so - // just break here. + // just break here. Similarly, global variables that are captured in a + // target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { - if (isOpenMPPrivateVar(Var, OpenMPLevel)) { - Nested = true; + auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel); + // When we detect target captures we are looking from inside the + // target region, therefore we need to propagate the capture from the + // enclosing region. Therefore, the capture is not initially nested. + if (isTargetCap) + FunctionScopesIndex--; + + if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) { + Nested = !isTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -190,6 +190,13 @@ bool hasExplicitDSA(VarDecl *D, const llvm::function_ref &CPred, unsigned Level); + + /// \brief Returns true if the directive at level \Level matches in the + /// specified \a DPred predicate. + bool hasExplicitDirective( + const llvm::function_ref &DPred, + unsigned Level); + /// \brief Finds a directive which matches specified \a DPred predicate. template bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent); @@ -640,6 +647,19 @@ CPred(StartI->SharingMap[D].Attributes); } +bool DSAStackTy::hasExplicitDirective( + const llvm::function_ref &DPred, + unsigned Level) { + if (isClauseParsingMode()) + ++Level; + auto StartI = Stack.rbegin(); + auto EndI = std::prev(Stack.rend()); + if (std::distance(StartI, EndI) <= (int)Level) + return false; + std::advance(StartI, Level); + return DPred(StartI->Directive); +} + template bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) { auto StartI = std::next(Stack.rbegin()); @@ -663,6 +683,30 @@ bool Sema::IsOpenMPCapturedVar(VarDecl *VD) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); VD = VD->getCanonicalDecl(); + + // If we are attempting to capture a global variable in a directive with + // 'target' we return true so that this global is also mapped to the device. + // + // FIXME: If the declaration is enclosed in a 'declare target' directive, + // then it should not be captured. Therefore, an extra check has to be + // inserted here once support for 'declare target' is added. + // + if (!VD->hasLocalStorage()) { + if (DSAStack->getCurrentDirective() == OMPD_target && + !DSAStack->isClauseParsingMode()) { + return true; + } + if (DSAStack->getCurScope() && + DSAStack->hasDirective( + [](OpenMPDirectiveKind K, const DeclarationNameInfo &DNI, + SourceLocation Loc) -> bool { + return isOpenMPTargetDirective(K); + }, + false)) { + return true; + } + } + if (DSAStack->getCurrentDirective() != OMPD_unknown && (!DSAStack->isClauseParsingMode() || DSAStack->getParentDirective() != OMPD_unknown)) { @@ -687,6 +731,14 @@ VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level); } +bool Sema::isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level) { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + // Return true if the current level is no longer enclosed in a target region. + + return !VD->hasLocalStorage() && + DSAStack->hasExplicitDirective(isOpenMPTargetDirective, Level); +} + void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind, Index: test/OpenMP/target_codegen_global_capture.cpp =================================================================== --- /dev/null +++ test/OpenMP/target_codegen_global_capture.cpp @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +// CHECK-DAG: [[GA:@.+]] = global double 1.000000e+00 +// CHECK-DAG: [[GB:@.+]] = global double 2.000000e+00 +// CHECK-DAG: [[GC:@.+]] = global double 3.000000e+00 +// CHECK-DAG: [[GD:@.+]] = global double 4.000000e+00 +// CHECK-DAG: [[FA:@.+]] = internal global float 5.000000e+00 +// CHECK-DAG: [[FB:@.+]] = internal global float 6.000000e+00 +// CHECK-DAG: [[FC:@.+]] = internal global float 7.000000e+00 +// CHECK-DAG: [[FD:@.+]] = internal global float 8.000000e+00 +// CHECK-DAG: [[BA:@.+]] = internal global float 9.000000e+00 +// CHECK-DAG: [[BB:@.+]] = internal global float 1.000000e+01 +// CHECK-DAG: [[BC:@.+]] = internal global float 1.100000e+01 +// CHECK-DAG: [[BD:@.+]] = internal global float 1.200000e+01 +double Ga = 1.0; +double Gb = 2.0; +double Gc = 3.0; +double Gd = 4.0; + +// CHECK: define {{.*}} @{{.*}}foo{{.*}}( +// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]]) +// CHECK: [[LA:%.+]] = alloca i16 +// CHECK: [[LB:%.+]] = alloca i16 +// CHECK: [[LC:%.+]] = alloca i16 +// CHECK: [[LD:%.+]] = alloca i16 +int foo(short a, short b, short c, short d){ + static float Sa = 5.0; + static float Sb = 6.0; + static float Sc = 7.0; + static float Sd = 8.0; + + // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8* + // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]], align + // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8* + // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]], align + // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8* + // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]], align + // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK: call i32 @__tgt_target + // CHECK: call void [[OFFLOADF:@.+]]( + // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd + #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0) + { + b += 1; + Gb += 1.0; + Sb += 1.0; + + // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}) + // The parallel region only uses 3 captures. + // CHECK: call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}) + // CHECK: call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}) + // Capture d, Gd, Sd, + + // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, + #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0) + { + d += 1; + Gd += 1.0; + Sd += 1.0; + } + } + return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd; +} + +// CHECK: define {{.*}} @{{.*}}bar{{.*}}( +// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]]) +// CHECK: [[LA:%.+]] = alloca i16 +// CHECK: [[LB:%.+]] = alloca i16 +// CHECK: [[LC:%.+]] = alloca i16 +// CHECK: [[LD:%.+]] = alloca i16 +int bar(short a, short b, short c, short d){ + static float Sa = 9.0; + static float Sb = 10.0; + static float Sc = 11.0; + static float Sd = 12.0; + + // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}) + // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]]) + // Capture a, b, c, d + #pragma omp parallel + { + // CHECK: [[ADRA:%.+]] = alloca i16*, align + // CHECK: [[ADRB:%.+]] = alloca i16*, align + // CHECK: [[ADRC:%.+]] = alloca i16*, align + // CHECK: [[ADRD:%.+]] = alloca i16*, align + // CHECK: store i16* [[A]], i16** [[ADRA]], align + // CHECK: store i16* [[B]], i16** [[ADRB]], align + // CHECK: store i16* [[C]], i16** [[ADRC]], align + // CHECK: store i16* [[D]], i16** [[ADRD]], align + // CHECK: [[REFA:%.+]] = load i16*, i16** [[ADRA]], + // CHECK: [[REFB:%.+]] = load i16*, i16** [[ADRB]], + // CHECK: [[REFC:%.+]] = load i16*, i16** [[ADRC]], + // CHECK: [[REFD:%.+]] = load i16*, i16** [[ADRD]], + + // CHECK: load float, float* [[BA]] + + // CHECK-DAG: [[CSTB:%.+]] = bitcast i16* [[REFB]] to i8* + // CHECK-DAG: [[CSTC:%.+]] = bitcast i16* [[REFC]] to i8* + // CHECK-DAG: [[CSTD:%.+]] = bitcast i16* [[REFD]] to i8* + // CHECK-DAG: store i8* [[CSTB]], i8** [[GEPB:%.+]], align + // CHECK-DAG: store i8* [[CSTC]], i8** [[GEPC:%.+]], align + // CHECK-DAG: store i8* [[CSTD]], i8** [[GEPD:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[BB]] to i8*), i8** [[GEPBB:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[BC]] to i8*), i8** [[GEPBC:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[BD]] to i8*), i8** [[GEPBD:%.+]], align + + // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPBB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPBC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPBD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK: call i32 @__tgt_target + // CHECK: call void [[OFFLOADF:@.+]]( + // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd + #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0) + { + b += 1; + Gb += 1.0; + Sb += 1.0; + + // CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}) + // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}) + + // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}) + // Capture d, Gd, Sd + #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0) + { + d += 1; + Gd += 1.0; + Sd += 1.0; + } + } + } + return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd; +} + +#endif