Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -350,13 +350,16 @@ /// \brief Emits a single region. /// \param SingleOpGen Generator for the statement associated with the given /// single region. - virtual void emitSingleRegion(CodeGenFunction &CGF, - const std::function &SingleOpGen, - SourceLocation Loc, - ArrayRef CopyprivateVars, - ArrayRef SrcExprs, - ArrayRef DstExprs, - ArrayRef AssignmentOps); + virtual void emitSingleRegion( + CodeGenFunction &CGF, const std::function &SingleOpGen, + SourceLocation Loc, ArrayRef CopyprivateVars, + ArrayRef DestExprs, ArrayRef SrcExprs, + ArrayRef AssignmentOps, + const llvm::function_ref< + void(CodeGenFunction & /*CGF*/, QualType /*OriginalType*/, + llvm::Value * /*DestAddr*/, llvm::Value * /*SrcAddr*/, + const VarDecl * /*DestVD*/, const VarDecl * /*SrcVD*/, + const Expr * /*AssignOp*/)> &SingleCopyprivateCodeGen); /// \brief Emit an implicit/explicit barrier for OpenMP threads. /// \param Kind Directive for which this implicit barrier call must be Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -981,8 +981,14 @@ } static llvm::Value *emitCopyprivateCopyFunction( - CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef SrcExprs, - ArrayRef DstExprs, ArrayRef AssignmentOps) { + CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef CopyprivateVars, ArrayRef DestExprs, + ArrayRef SrcExprs, ArrayRef AssignmentOps, + const llvm::function_ref< + void(CodeGenFunction & /*CGF*/, QualType /*OriginalType*/, + llvm::Value * /*DestAddr*/, llvm::Value * /*SrcAddr*/, + const VarDecl * /*DestVD*/, const VarDecl * /*SrcVD*/, + const Expr * /*AssignOp*/)> &SingleCopyprivateCodeGen) { auto &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; @@ -1001,7 +1007,7 @@ CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); - // Dst = (void*[n])(LHSArg); + // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), @@ -1015,43 +1021,37 @@ // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; - CodeGenFunction::OMPPrivateScope Scope(CGF); for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { - Scope.addPrivate( + auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad( + CGF.Builder.CreateStructGEP(nullptr, LHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); + auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad( + CGF.Builder.CreateStructGEP(nullptr, RHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); + SingleCopyprivateCodeGen( + CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr, + cast(cast(DestExprs[I])->getDecl()), cast(cast(SrcExprs[I])->getDecl()), - [&]() -> llvm::Value *{ - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(nullptr, RHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); - }); - Scope.addPrivate( - cast(cast(DstExprs[I])->getDecl()), - [&]() -> llvm::Value *{ - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(nullptr, LHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); - }); - } - Scope.Privatize(); - for (auto *E : AssignmentOps) { - CGF.EmitIgnoredExpr(E); + AssignmentOps[I]); } - Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; } -void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, - const std::function &SingleOpGen, - SourceLocation Loc, - ArrayRef CopyprivateVars, - ArrayRef SrcExprs, - ArrayRef DstExprs, - ArrayRef AssignmentOps) { +void CGOpenMPRuntime::emitSingleRegion( + CodeGenFunction &CGF, const std::function &SingleOpGen, + SourceLocation Loc, ArrayRef CopyprivateVars, + ArrayRef SrcExprs, ArrayRef DstExprs, + ArrayRef AssignmentOps, + const llvm::function_ref< + void(CodeGenFunction & /*CGF*/, QualType /*OriginalType*/, + llvm::Value * /*DestAddr*/, llvm::Value * /*SrcAddr*/, + const VarDecl * /*DestVD*/, const VarDecl * /*SrcVD*/, + const Expr * /*AssignOp*/)> &SingleCopyprivateCodeGen) { assert(CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()); @@ -1121,7 +1121,8 @@ // threads in the corresponding parallel region. auto *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), - SrcExprs, DstExprs, AssignmentOps); + CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, + SingleCopyprivateCodeGen); auto *BufSize = CGF.Builder.getInt32( C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -84,72 +84,52 @@ CGF.EmitBlock(ContBlock, /*IsFinished*/ true); } -void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, - llvm::Value *PrivateAddr, - const Expr *AssignExpr, - QualType OriginalType, - const VarDecl *VDInit) { - EmitBlock(createBasicBlock(".omp.assign.begin.")); - if (!isa(AssignExpr) || isTrivialInitializer(AssignExpr)) { - // Perform simple memcpy. - EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), - AssignExpr->getType()); - } else { - // Perform element-by-element initialization. - QualType ElementTy; - auto SrcBegin = OriginalAddr.getAddress(); - auto DestBegin = PrivateAddr; - auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); - auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); - auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); - auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); - auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); - // The basic structure here is a do-while loop, because we don't - // need to check for the zero-element case. - auto BodyBB = createBasicBlock("omp.arraycpy.body"); - auto DoneBB = createBasicBlock("omp.arraycpy.done"); - auto IsEmpty = - Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); - Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = Builder.GetInsertBlock(); - EmitBlock(BodyBB); - auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPast->addIncoming(SrcEnd, EntryBB); - auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, - "omp.arraycpy.destElementPast"); - DestElementPast->addIncoming(DestEnd, EntryBB); - - // Shift the address back by one element. - auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); - auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, - "omp.arraycpy.dest.element"); - auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, - "omp.arraycpy.src.element"); - { - // Create RunCleanScope to cleanup possible temps. - CodeGenFunction::RunCleanupsScope Init(*this); - // Emit initialization for single element. - LocalDeclMap[VDInit] = SrcElement; - EmitAnyExprToMem(AssignExpr, DestElement, - AssignExpr->getType().getQualifiers(), - /*IsInitializer*/ false); - LocalDeclMap.erase(VDInit); - } +void CodeGenFunction::EmitOMPAggregateAssign( + llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, + const llvm::function_ref &CopyGen) { + // Perform element-by-element initialization. + QualType ElementTy; + auto SrcBegin = SrcAddr; + auto DestBegin = DestAddr; + auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); + auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); + auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); + auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); + auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); + // The basic structure here is a do-while loop, because we don't + // need to check for the zero-element case. + auto BodyBB = createBasicBlock("omp.arraycpy.body"); + auto DoneBB = createBasicBlock("omp.arraycpy.done"); + auto IsEmpty = + Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); + Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = Builder.GetInsertBlock(); + EmitBlock(BodyBB); + auto SrcElementPast = + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + SrcElementPast->addIncoming(SrcEnd, EntryBB); + auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, + "omp.arraycpy.destElementPast"); + DestElementPast->addIncoming(DestEnd, EntryBB); + + // Shift the address back by one element. + auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); + auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, + "omp.arraycpy.dest.element"); + auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, + "omp.arraycpy.src.element"); + // Emit copy. + CopyGen(DestElement, SrcElement); + // Check whether we've reached the end. + auto Done = Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); + Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); + SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); - // Check whether we've reached the end. - auto Done = - Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); - Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); - SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); - - // Done. - EmitBlock(DoneBB, true); - } - EmitBlock(createBasicBlock(".omp.assign.end.")); + // Done. + EmitBlock(DoneBB, true); } void CodeGenFunction::EmitOMPFirstprivateClause( @@ -173,13 +153,33 @@ LValue Base = MakeNaturalAlignAddrLValue( CapturedStmtInfo->getContextValue(), getContext().getTagDeclType(FD->getParent())); - auto OriginalAddr = EmitLValueForField(Base, FD); + auto *OriginalAddr = EmitLValueForField(Base, FD).getAddress(); auto VDInit = cast(cast(*InitsRef)->getDecl()); IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { auto Emission = EmitAutoVarAlloca(*VD); // Emit initialization of aggregate firstprivate vars. - EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), - VD->getInit(), (*IRef)->getType(), VDInit); + auto *AssignExpr = VD->getInit(); + if (!isa(AssignExpr) || + isTrivialInitializer(AssignExpr)) { + // Perform simple memcpy. + EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, + (*IRef)->getType()); + } else { + EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), OriginalAddr, + (*IRef)->getType(), + [this, VDInit, AssignExpr](llvm::Value *DestElement, + llvm::Value *SrcElement) { + // Create RunCleanScope to cleanup possible temps. + RunCleanupsScope Init(*this); + // Emit initialization for single element. + LocalDeclMap[VDInit] = SrcElement; + EmitAnyExprToMem(AssignExpr, DestElement, + AssignExpr->getType().getQualifiers(), + /*IsInitializer*/ false); + LocalDeclMap.erase(VDInit); + }); + } EmitAutoVarCleanups(Emission); return Emission.getAllocatedAddress(); }); @@ -803,6 +803,46 @@ return LVal; } +static void EmitCopyAssignment(CodeGenFunction &CGF, QualType OriginalType, + llvm::Value *DestAddr, llvm::Value *SrcAddr, + const VarDecl *DestVD, const VarDecl *SrcVD, + const Expr *AssignOp) { + if (OriginalType->isArrayType()) { + auto *BO = dyn_cast(AssignOp); + if (BO && BO->getOpcode() == BO_Assign) { + // Perform simple memcpy for simple copying. + CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); + } else { + // For arrays with complex element types perform element by element + // copying. + CGF.EmitOMPAggregateAssign( + DestAddr, SrcAddr, OriginalType, + [&CGF, AssignOp, SrcVD, DestVD](llvm::Value *DestElement, + llvm::Value *SrcElement) { + // Working with the single array element, so have to remap + // destination and source variables to corresponding array + // elements. + CodeGenFunction::OMPPrivateScope Remap(CGF); + Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ + return DestElement; + }); + Remap.addPrivate( + SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); + (void)Remap.Privatize(); + CGF.EmitIgnoredExpr(AssignOp); + }); + } + } else { + // Remap pseudo source variable to private copy. + CodeGenFunction::OMPPrivateScope Remap(CGF); + Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); + Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); + (void)Remap.Privatize(); + // Emit copying of the whole variable. + CGF.EmitIgnoredExpr(AssignOp); + } +} + void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { InlinedOpenMPRegionScopeRAII Region(*this, S); @@ -879,11 +919,15 @@ } else { // If only one section is found - no need to generate loop, emit as a single // region. - CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(Stmt); - EnsureInsertPoint(); - }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); + CGM.getOpenMPRuntime().emitSingleRegion( + *this, + [&]() -> void { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(Stmt); + EnsureInsertPoint(); + }, + S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None, + EmitCopyAssignment); } // Emit an implicit barrier at the end. @@ -903,7 +947,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { llvm::SmallVector CopyprivateVars; llvm::SmallVector SrcExprs; - llvm::SmallVector DstExprs; + llvm::SmallVector DestExprs; llvm::SmallVector AssignmentOps; // Check if there are any 'copyprivate' clauses associated with this 'single' // construct. @@ -918,17 +962,21 @@ auto *C = cast(*I); CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); - DstExprs.append(C->destination_exprs().begin(), - C->destination_exprs().end()); + DestExprs.append(C->destination_exprs().begin(), + C->destination_exprs().end()); AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } // Emit code for 'single' region along with 'copyprivate' clauses - CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { - InlinedOpenMPRegionScopeRAII Region(*this, S); - EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); - EnsureInsertPoint(); - }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); + CGM.getOpenMPRuntime().emitSingleRegion( + *this, + [&]() -> void { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + EnsureInsertPoint(); + }, + S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps, + EmitCopyAssignment); // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2026,9 +2026,18 @@ llvm::Function *GenerateCapturedStmtFunctionEpilog(const CapturedStmt &S); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S); - void EmitOMPAggregateAssign(LValue OriginalAddr, llvm::Value *PrivateAddr, - const Expr *AssignExpr, QualType Type, - const VarDecl *VDInit); + /// \brief Perform element by element copying of arrays with type \a + /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure + /// generated by \a CopyGen. + /// + /// \param DestAddr Address of the destination array. + /// \param SrcAddr Address of the source array. + /// \param OriginalType Type of destination and source arrays. + /// \param CopyGen Copying procedure that copies value of single array element + /// to another single array element. + void EmitOMPAggregateAssign( + llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, + const llvm::function_ref &CopyGen); void EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPPrivateClause(const OMPExecutableDirective &D, Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -5944,14 +5944,15 @@ // A variable of class type (or array thereof) that appears in a // copyin clause requires an accessible, unambiguous copy assignment // operator for the class type. - auto *SrcVD = BuildVarDecl(*this, DE->getLocStart(), VD->getType(), - ".copyprivate.src"); - auto *PseudoSrcExpr = BuildDeclRefExpr(SrcVD, DE->getType(), VK_LValue, - DE->getExprLoc()).get(); - auto *DstVD = BuildVarDecl(*this, DE->getLocStart(), VD->getType(), - ".copyprivate.dst"); - auto *PseudoDstExpr = BuildDeclRefExpr(DstVD, DE->getType(), VK_LValue, - DE->getExprLoc()).get(); + Type = Context.getBaseElementType(Type).getUnqualifiedType(); + auto *SrcVD = + BuildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src"); + auto *PseudoSrcExpr = + BuildDeclRefExpr(SrcVD, Type, VK_LValue, DE->getExprLoc()).get(); + auto *DstVD = + BuildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst"); + auto *PseudoDstExpr = + BuildDeclRefExpr(DstVD, Type, VK_LValue, DE->getExprLoc()).get(); auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign, PseudoDstExpr, PseudoSrcExpr); if (AssignmentOp.isInvalid()) Index: test/OpenMP/parallel_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/parallel_firstprivate_codegen.cpp +++ test/OpenMP/parallel_firstprivate_codegen.cpp @@ -165,17 +165,11 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], // CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]], -// CHECK: br label %[[VEC_PRIV_INIT:.+]] -// CHECK: [[VEC_PRIV_INIT]] // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], -// CHECK: br label %[[VEC_PRIV_INIT_END:.+]] -// CHECK: [[VEC_PRIV_INIT_END]] // CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 // CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_REF_PTR]], -// CHECK: br label %[[S_ARR_PRIV_INIT:.+]] -// CHECK: [[S_ARR_PRIV_INIT]] // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -187,8 +181,6 @@ // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] -// CHECK: br label %[[S_ARR_PRIV_INIT_END:.+]] -// CHECK: [[S_ARR_PRIV_INIT_END]] // CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_REF_PTR]], // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) @@ -220,17 +212,11 @@ // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], // CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]], -// CHECK: br label %[[VEC_PRIV_INIT:.+]] -// CHECK: [[VEC_PRIV_INIT]] // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], -// CHECK: br label %[[VEC_PRIV_INIT_END:.+]] -// CHECK: [[VEC_PRIV_INIT_END]] // CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 // CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF_PTR]], -// CHECK: br label %[[S_ARR_PRIV_INIT:.+]] -// CHECK: [[S_ARR_PRIV_INIT]] // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[S_ARR_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 @@ -242,8 +228,6 @@ // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]]) // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] -// CHECK: br label %[[S_ARR_PRIV_INIT_END:.+]] -// CHECK: [[S_ARR_PRIV_INIT_END]] // CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]], // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]]) Index: test/OpenMP/single_codegen.cpp =================================================================== --- test/OpenMP/single_codegen.cpp +++ test/OpenMP/single_codegen.cpp @@ -23,7 +23,8 @@ // CHECK: define void [[FOO:@.+]]() TestClass tc; -#pragma omp threadprivate(tc) +TestClass tc2[2]; +#pragma omp threadprivate(tc, tc2) void foo() {} @@ -31,13 +32,15 @@ // TERM_DEBUG-LABEL: @main int main() { // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8 + // CHECK-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8] // CHECK-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]] char a; + char a2[2]; TestClass c; // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // CHECK-DAG: [[DID_IT:%.+]] = alloca i32, -// CHECK-DAG: [[COPY_LIST:%.+]] = alloca [3 x i8*], +// CHECK-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*], // CHECK: store i32 0, i32* [[DID_IT]] // CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) @@ -62,21 +65,29 @@ // CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // CHECK-NEXT: br label {{%?}}[[EXIT]] // CHECK: [[EXIT]] -// CHECK: [[A_PTR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: store i8* [[A_ADDR]], i8** [[A_PTR_REF]], -// CHECK: [[C_PTR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // CHECK: [[C_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[C_ADDR]] to i8* // CHECK: store i8* [[C_PTR_REF_VOID_PTR]], i8** [[C_PTR_REF]], -// CHECK: [[TC_PTR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // CHECK: [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached // CHECK: [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* // CHECK: [[TC_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[TC_THREADPRIVATE_ADDR]] to i8* // CHECK: store i8* [[TC_PTR_REF_VOID_PTR]], i8** [[TC_PTR_REF]], -// CHECK: [[COPY_LIST_VOID_PTR:%.+]] = bitcast [3 x i8*]* [[COPY_LIST]] to i8* +// CHECK: [[A2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: [[BITCAST:%.+]] = bitcast [2 x i8]* [[A2_ADDR]] to i8* +// CHECK: store i8* [[BITCAST]], i8** [[A2_PTR_REF]], +// CHECK: [[TC2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: [[TC2_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached +// CHECK: [[TC2_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC2_THREADPRIVATE_ADDR_VOID_PTR]] to [2 x [[TEST_CLASS_TY]]]* +// CHECK: [[TC2_PTR_REF_VOID_PTR:%.+]] = bitcast [2 x [[TEST_CLASS_TY]]]* [[TC2_THREADPRIVATE_ADDR]] to i8* +// CHECK: store i8* [[TC2_PTR_REF_VOID_PTR]], i8** [[TC2_PTR_REF]], +// CHECK: [[COPY_LIST_VOID_PTR:%.+]] = bitcast [5 x i8*]* [[COPY_LIST]] to i8* // CHECK: [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]], -// CHECK: call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 24, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]]) +// CHECK: call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 40, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]]) // CHECK: call{{.*}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]]) -#pragma omp single copyprivate(a, c, tc) +#pragma omp single copyprivate(a, c, tc, a2, tc2) foo(); // CHECK-NOT: call i32 @__kmpc_single // CHECK-NOT: call void @__kmpc_end_single @@ -87,29 +98,43 @@ // CHECK: store i8* %0, i8** [[DST_ADDR_REF:%.+]], // CHECK: store i8* %1, i8** [[SRC_ADDR_REF:%.+]], // CHECK: [[DST_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_ADDR_REF]], -// CHECK: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [3 x i8*]* +// CHECK: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [5 x i8*]* // CHECK: [[SRC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_ADDR_REF]], -// CHECK: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [3 x i8*]* -// CHECK: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]], -// CHECK: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [5 x i8*]* +// CHECK: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[DST_A_ADDR:%.+]] = load i8*, i8** [[DST_A_ADDR_REF]], -// CHECK: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]], -// CHECK: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR:%.+]] to [[TEST_CLASS_TY]]* -// CHECK: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]], -// CHECK: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR:%.+]] to [[TEST_CLASS_TY]]* -// CHECK: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]], -// CHECK: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR:%.+]] to [[TEST_CLASS_TY]]* -// CHECK: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]], -// CHECK: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR:%.+]] to [[TEST_CLASS_TY]]* +// CHECK: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]], // CHECK: [[SRC_A_VAL:%.+]] = load i8, i8* [[SRC_A_ADDR]], // CHECK: store i8 [[SRC_A_VAL]], i8* [[DST_A_ADDR]], +// CHECK: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]], +// CHECK: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// CHECK: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]], +// CHECK: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* // CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_C_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_C_ADDR]]) -// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]]) +// CHECK: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]], +// CHECK: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// CHECK: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]], +// CHECK: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]]) +// CHECK: [[DST_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]], +// CHECK: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]], +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[DST_A2_ADDR]], i8* [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i{{[0-9]+}} 1, i1 false) +// CHECK: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]], +// CHECK: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// CHECK: [[SRC_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: [[SRC_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC2_ADDR_REF]], +// CHECK: [[SRC_TC2_ADDR:%.+]] = bitcast i8* [[SRC_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// CHECK: br i1 +// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* %{{.+}}, [[TEST_CLASS_TY]]* {{.*}}) +// CHECK: br i1 // CHECK: ret void // CHECK-LABEL: parallel_single