Index: include/clang/Sema/Sema.h =================================================================== --- include/clang/Sema/Sema.h +++ include/clang/Sema/Sema.h @@ -7456,6 +7456,10 @@ bool IsOpenMPCapturedVar(VarDecl *VD); public: + /// \brief Check if the specified variable is used in one of the private + /// clauses in OpenMP constructs. + bool isOpenMPPrivateVar(VarDecl *VD); + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, Expr *Op); /// \brief Called on start of new data sharing attribute block. Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -2116,12 +2116,15 @@ for (CapturedStmt::capture_init_iterator I = S.capture_init_begin(), E = S.capture_init_end(); I != E; ++I, ++CurField) { - LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); - if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); - } else { - EmitInitializerForField(*CurField, LV, *I, None); + // Do not emit initialization for OpenMP private variables. + if (CurField->hasCapturedVLAType() || *I) { + LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); + if (CurField->hasCapturedVLAType()) { + auto VAT = CurField->getCapturedVLAType(); + EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); + } else { + EmitInitializerForField(*CurField, LV, *I, /*ArrayIndexes=*/llvm::None); + } } } Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -12497,7 +12497,7 @@ QualType &DeclRefType, const bool RefersToCapturedVariable, Sema &S) { - + // By default, capture variables by reference. bool ByRef = true; // Using an LValue reference type is consistent with Lambdas (see below). @@ -12509,26 +12509,28 @@ // evaluation will be needed. RecordDecl *RD = RSI->TheRecordDecl; - FieldDecl *Field - = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType, + FieldDecl *Field = + FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType, S.Context.getTrivialTypeSourceInfo(CaptureType, Loc), nullptr, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); - - CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, - DeclRefType, VK_LValue, Loc); + Var->setReferenced(true); Var->markUsed(S.Context); + // Do not pass reference to original variable for variables explicitly + // specified in 'private' clause. + if (!S.getLangOpts().OpenMP || !S.isOpenMPPrivateVar(Var)) { + CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, + DeclRefType, VK_LValue, Loc); + } + + // Actually capture the variable. + RSI->addCapture(Var, /*isBlock*/ false, ByRef, RefersToCapturedVariable, + Loc, SourceLocation(), CaptureType, CopyExpr); } - // Actually capture the variable. - if (BuildAndDiagnose) - RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc, - SourceLocation(), CaptureType, CopyExpr); - - return true; } Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -604,6 +604,15 @@ return false; } +bool Sema::isOpenMPPrivateVar(VarDecl *VD) { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + VD = VD->getCanonicalDecl(); + auto DVarPrivate = DSAStack->getTopDSA(VD, /*FromParent=*/false); + // For tasks we must capture privates also, because codegen for task is + // different rather than for other directives. + return DVarPrivate.CKind == OMPC_private && DVarPrivate.RefExpr; +} + void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind, Index: test/OpenMP/parallel_private_codegen.cpp =================================================================== --- test/OpenMP/parallel_private_codegen.cpp +++ test/OpenMP/parallel_private_codegen.cpp @@ -44,9 +44,8 @@ // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] - // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // LAMBDA-NOT: getelementptr inbounds + // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8* // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) #pragma omp parallel private(g) { @@ -76,9 +75,8 @@ // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]] - // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8* + // BLOCKS-NOT: getelementptr inbounds + // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8* // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) #pragma omp parallel private(g) { Index: test/OpenMP/task_private_codegen.cpp =================================================================== --- test/OpenMP/task_private_codegen.cpp +++ test/OpenMP/task_private_codegen.cpp @@ -136,15 +136,8 @@ // CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]]) -// Store original variables in capture struct. -// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], -// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 -// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]], +// Check there is no store of references to original variables in capture struct. +// CHECK-NOT: getelementptr inbounds [[CAP_MAIN_TY]], // Allocate task. // Returns struct kmp_task_t { @@ -254,15 +247,8 @@ // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// Store original variables in capture struct. -// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], -// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 -// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]], +// Check there is no store of references to original variables in capture struct. +// CHECK-NOT: getelementptr inbounds [[CAP_TMAIN_TY]], // Allocate task. // Returns struct kmp_task_t {