Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -7454,6 +7454,10 @@ ExprResult VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind); public: + /// \brief Checks if the specified variable is used in one of the private + /// clauses in OpenMP constructs. + bool IsOpenMPPrivateVar(VarDecl *VD); + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, Expr *Op); /// \brief Called on start of new data sharing attribute block. Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1897,6 +1897,23 @@ QualType T = E->getType(); if (const auto *VD = dyn_cast(ND)) { + // Check for captured globals. + if (!VD->hasLocalStorage()) { + if (FieldDecl *FD = LambdaCaptureFields.lookup(VD)) + return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); + else if (CapturedStmtInfo) { + if (const FieldDecl *FD = CapturedStmtInfo->lookup(VD)) { + if (llvm::Value *V = LocalDeclMap.lookup(VD)) + return MakeAddrLValue(V, T, Alignment); + else + return EmitCapturedFieldLValue(*this, FD, + CapturedStmtInfo->getContextValue()); + } + } else if (auto *BD = dyn_cast_or_null(CurCodeDecl)) + if (BD->capturesVariable(VD)) + return MakeAddrLValue( + GetAddrOfBlockDecl(VD, VD->hasAttr()), T, Alignment); + } // Global Named registers access via intrinsics only if (VD->getStorageClass() == SC_Register && VD->hasAttr() && !VD->isLocalVarDecl()) Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -614,7 +614,6 @@ addPrivate(const VarDecl *LocalVD, const std::function &PrivateGen) { assert(PerformCleanup && "adding private to dead scope"); - assert(LocalVD->isLocalVarDecl() && "privatizing non-local variable"); if (SavedLocals.count(LocalVD) > 0) return false; SavedLocals[LocalVD] = CGF.LocalDeclMap.lookup(LocalVD); CGF.LocalDeclMap.erase(LocalVD); Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -11654,7 +11654,7 @@ const bool Diagnose, Sema &S) { if (isa(DC) || isa(DC) || isLambdaCallOperator(DC)) return getLambdaAwareParentOfDeclContext(DC); - else { + else if (Var->hasLocalStorage()) { if (Diagnose) diagnoseUncapturableValueReference(S, Loc, Var, DC); } @@ -12090,6 +12090,10 @@ return true; } +static bool NeedToCaptureGlobalVariable(Sema &S, VarDecl *VD) { + return S.IsOpenMPPrivateVar(VD); +} + bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind, SourceLocation EllipsisLoc, bool BuildAndDiagnose, @@ -12115,7 +12119,8 @@ // If the variable is declared in the current context (and is not an // init-capture), there is no need to capture it. if (!Var->isInitCapture() && Var->getDeclContext() == DC) return true; - if (!Var->hasLocalStorage()) return true; + if (!Var->hasLocalStorage() && !NeedToCaptureGlobalVariable(*this, Var)) + return true; // Walk up the stack to determine whether we can capture the variable, // performing the "simple" checks that don't depend on type. We stop when @@ -12136,8 +12141,14 @@ ExprLoc, BuildAndDiagnose, *this); - if (!ParentDC) return true; - + if (!ParentDC) { + if (!Var->hasLocalStorage()) { + FunctionScopesIndex = MaxFunctionScopesIndex - 1; + break; + } + return true; + } + FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex]; CapturingScopeInfo *CSI = cast(FSI); Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -551,6 +551,18 @@ #define DSAStack static_cast(VarDataSharingAttributesStack) +bool Sema::IsOpenMPPrivateVar(VarDecl *VD) { + if (LangOpts.OpenMP && DSAStack->getCurrentDirective() != OMPD_unknown) { + auto DVarPrivate = DSAStack->getTopDSA(VD, /*FromParent=*/false); + if (DVarPrivate.CKind != OMPC_unknown && isOpenMPPrivate(DVarPrivate.CKind)) + return true; + DVarPrivate = DSAStack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(), + /*FromParent=*/false); + return DVarPrivate.CKind != OMPC_unknown; + } + return false; +} + void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind, Index: test/OpenMP/parallel_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/parallel_firstprivate_codegen.cpp +++ test/OpenMP/parallel_firstprivate_codegen.cpp @@ -1,6 +1,8 @@ // RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -12,7 +14,7 @@ ~St() {} }; -volatile int g; +volatile int g = 1212; template struct S { @@ -47,6 +49,83 @@ } int main() { +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] + // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // LAMBDA: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) +#pragma omp parallel firstprivate(g) + { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], + // LAMBDA: [[ARG:%.+]] = load %{{.+}}** [[ARG_REF]] + // LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_REF_ADDR]] + // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}* [[G_REF]] + // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // LAMBDA: call void @__kmpc_barrier( + g = 1; + // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] + // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212, + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8* + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] + // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // BLOCKS: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) +#pragma omp parallel firstprivate(g) + { + // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], + // BLOCKS: [[ARG:%.+]] = load %{{.+}}** [[ARG_REF]] + // BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_REF_ADDR]] + // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}* [[G_REF]] + // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // BLOCKS: call void @__kmpc_barrier( + g = 1; + // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8* + ^{ + // BLOCKS: define {{.+}} void {{@.+}}(i8* + g = 2; + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: ret + }(); + } + }(); + return 0; +#else S test; int t_var = 0; int vec[] = {1, 2}; @@ -58,6 +137,7 @@ s_arr[0] = var; } return tmain(); +#endif } // CHECK: define {{.*}}i{{[0-9]+}} @main() Index: test/OpenMP/parallel_private_codegen.cpp =================================================================== --- test/OpenMP/parallel_private_codegen.cpp +++ test/OpenMP/parallel_private_codegen.cpp @@ -1,6 +1,8 @@ // RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -14,6 +16,8 @@ ~S() {} }; +volatile int g = 1212; + // CHECK: [[S_FLOAT_TY:%.+]] = type { float } // CHECK: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* } // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } @@ -22,7 +26,7 @@ template T tmain() { S test; - T t_var; + T t_var = T(); T vec[] = {1, 2}; S s_arr[] = {1, 2}; S var(3); @@ -35,8 +39,75 @@ } int main() { +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] + // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // LAMBDA: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) +#pragma omp parallel private(g) + { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], + // LAMBDA: call void @__kmpc_barrier( + g = 1; + // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] + // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212, + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8* + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] + // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // BLOCKS: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) +#pragma omp parallel private(g) + { + // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], + // BLOCKS: call void @__kmpc_barrier( + g = 1; + // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8* + ^{ + // BLOCKS: define {{.+}} void {{@.+}}(i8* + g = 2; + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: ret + }(); + } + }(); + return 0; +#else S test; - int t_var; + int t_var = 0; int vec[] = {1, 2}; S s_arr[] = {1, 2}; S var(3); @@ -46,6 +117,7 @@ s_arr[0] = var; } return tmain(); +#endif } // CHECK: define i{{[0-9]+}} @main()