diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3560,6 +3560,74 @@ S.hasCancel()); } +Address CodeGenFunction::OMPBuilderCBHelpers::emitAddrOfVarFromArray( + CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) { + // Pull out the pointer to the variable. + Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); + llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); + + Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); + Addr = CGF.Builder.CreateElementBitCast( + Addr, CGF.ConvertTypeForMem(Var->getType())); + return Addr; +} + +llvm::Value *CodeGenFunction::OMPBuilderCBHelpers::emitCopyprivateCopyFunction( + CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef CopyprivateVars, ArrayRef DestExprs, + ArrayRef SrcExprs, ArrayRef AssignmentOps, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + // void copy_func(void *LHSArg, void *RHSArg); + FunctionArgList Args; + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + Args.push_back(&LHSArg); + Args.push_back(&RHSArg); + const auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + // Dest = (void*[n])(LHSArg); + // Src = (void*[n])(RHSArg); + Address LHS( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType), + CGF.getPointerAlign()); + Address RHS( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType), + CGF.getPointerAlign()); + // *(Type0*)Dst[0] = *(Type0*)Src[0]; + // *(Type1*)Dst[1] = *(Type1*)Src[1]; + // ... + // *(Typen*)Dst[n] = *(Typen*)Src[n]; + for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { + const auto *DestVar = + cast(cast(DestExprs[I])->getDecl()); + Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); + + const auto *SrcVar = + cast(cast(SrcExprs[I])->getDecl()); + Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); + + const auto *VD = cast(CopyprivateVars[I])->getDecl(); + QualType Type = VD->getType(); + CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); + } + CGF.FinishFunction(); + return Fn; +} + void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { llvm::SmallVector CopyprivateVars; llvm::SmallVector DestExprs; @@ -3577,6 +3645,93 @@ AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } + + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + ASTContext &Ctx = CGM.getContext(); + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *SingleRegionBodyStmt = CS->getCapturedStmt(); + + bool HasCopyPrivate = !CopyprivateVars.empty(); + Address DidIt = Address::invalid(); + llvm::Value *DidItPtr = nullptr; + if (HasCopyPrivate) { + QualType KmpInt32Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1); + DidIt = CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); + DidItPtr = DidIt.getPointer(); + } + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [SingleRegionBodyStmt, &DidIt, + this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + // todo: Add support for first/lastprivate privatization + + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SingleRegionBodyStmt, + CodeGenIP, FiniBB); + if (DidIt.isValid()) { + // did_it = 1; + auto term = this->Builder.GetInsertBlock()->getTerminator(); + auto DidItMasterStore = + this->Builder.CreateStore(this->Builder.getInt32(1), DidIt); + DidItMasterStore->moveBefore(term); + } + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP( + OMPBuilder.CreateSingle(Builder, BodyGenCB, FiniCB, DidItPtr)); + + if (DidIt.isValid()) { + // Create a list of all private variables for copyprivate. + llvm::APInt ArraySize(/*unsigned int numBits=*/32, + CopyprivateVars.size()); + QualType CopyprivateArrayTy = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address CopyprivateList = + CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); + for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { + Address Elem = Builder.CreateConstArrayGEP(CopyprivateList, I); + Builder.CreateStore( + Builder.CreatePointerBitCastOrAddrSpaceCast( + EmitLValue(CopyprivateVars[I]).getPointer(*this), VoidPtrTy), + Elem); + } + // Build function that copies private values from single region to all + // other threads in the corresponding parallel region. + std::string CopyFuncName = OMPBuilderCBHelpers::getNameWithSeparators( + {"omp", "copyprivate", "copy_func"}); + llvm::Value *CpyFn = OMPBuilderCBHelpers::emitCopyprivateCopyFunction( + CGM, ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), + CopyprivateVars, SrcExprs, DestExprs, AssignmentOps, S.getBeginLoc()); + + llvm::Value *ListSize = getTypeSize(CopyprivateArrayTy); + Address CL = Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, + VoidPtrTy); + + OMPBuilder.CreateCopyPrivate(Builder, ListSize, CL.getPointer(), CpyFn, + DidItPtr); + } + + // Emit an implicit barrier at the end (to avoid data race on firstprivate + // init or if no 'nowait' clause was specified and no 'copyprivate' clause). + if (!S.getSingleClause() && CopyprivateVars.empty()) { + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getBeginLoc(), + S.getSingleClause() ? OMPD_unknown : OMPD_single); + } + return; + } + // Emit code for 'single' region along with 'copyprivate' clauses auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1640,6 +1640,28 @@ const VarDecl *VD, Address VDAddr, SourceLocation Loc); + /// Generate the copy function used as callback in __kmpc_copyprivate , and + /// return a pointer to it + /// + /// \param CGM CG module where the function is created + /// \param ArgsType pointer type to copy private list + /// \param CopyprivateVars list of copyprivate clause variables + /// \param DstExprs list of destination expressions + /// \param SrcExprs list of source expressions + /// \param AssignmentOps list of assignment expression + /// \param Loc The location where the barrier directive was encountered + /// + static llvm::Value *emitCopyprivateCopyFunction( + CodeGenModule &CGM, llvm::Type *ArgsType, + ArrayRef CopyprivateVars, ArrayRef DstExprs, + ArrayRef SrcExprs, ArrayRef AssignmentOps, + SourceLocation Loc); + + /// Given an array of pointers to variables, project the address of a + /// given variable. + static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, + unsigned Index, const VarDecl *Var); + /// Gets the OpenMP-specific address of the local variable /p VD. static Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -1,11 +1,12 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=OMP50,CHECK -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=OMP45,CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=OMP50,ALL,CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=OMP45,ALL,CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -DOMPBUILDER -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,OMPBUILDER // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP50,CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP50,ALL,CHECK // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP45,CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP45,ALL,CHECK // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s @@ -30,13 +31,13 @@ ~TestClass(){}; }; -// CHECK-DAG: [[TEST_CLASS_TY:%.+]] = type { i{{[0-9]+}} } +// ALL-DAG: [[TEST_CLASS_TY:%.+]] = type { i{{[0-9]+}} } // CHECK-DAG: [[SST_TY:%.+]] = type { double } // CHECK-DAG: [[SS_TY:%.+]] = type { i32, i8, i32* } -// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK: [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8* +// ALL-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// ALL: [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8* -// CHECK: define void [[FOO:@.+]]() +// ALL: define void [[FOO:@.+]]() TestClass tc; TestClass tc2[2]; @@ -44,6 +45,7 @@ void foo() { extern void mayThrow(); mayThrow(); } +#ifndef OMPBUILDER struct SS { int a; int b : 4; @@ -76,129 +78,132 @@ }(); } }; +#endif -// CHECK-LABEL: @main +// ALL-LABEL: @main // TERM_DEBUG-LABEL: @main int main() { - // CHECK: alloca i32 - // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8 - // CHECK-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8] - // CHECK-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]] - // CHECK-DAG: [[DID_IT:%.+]] = alloca i32, - // CHECK-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*], + // ALL: alloca i32 + // ALL-DAG: [[A_ADDR:%.+]] = alloca i8 + // ALL-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8] + // ALL-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]] + // ALL-DAG: [[DID_IT:%.+]] = alloca i32, + // ALL-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*], char a; char a2[2]; TestClass &c = tc; + +#ifndef OMPBUILDER SST sst; SS ss(c.a); +#endif -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) -// CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] -// CHECK-NEXT: store i8 2, i8* [[A_ADDR]] -// CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] -// CHECK-NOT: call {{.+}} @__kmpc_cancel_barrier +// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) +// ALL: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 +// ALL-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// ALL: [[THEN]] +// ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// ALL-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL-NEXT: br label {{%?}}[[EXIT]] +// ALL: [[EXIT]] +// ALL-NOT: call {{.+}} @__kmpc_cancel_barrier #pragma omp single nowait a = 2; -// CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] -// CHECK-NEXT: store i8 2, i8* [[A_ADDR]] -// CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] -// CHECK: call{{.*}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]]) +// ALL: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 +// ALL-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// ALL: [[THEN]] +// ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// ALL-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL-NEXT: br label {{%?}}[[EXIT]] +// ALL: [[EXIT]] +// ALL: call{{.*}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]]) #pragma omp single a = 2; -// CHECK: store i32 0, i32* [[DID_IT]] -// CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] -// CHECK-NEXT: invoke void [[FOO]]() -// CHECK: to label {{%?}}[[CONT:.+]] unwind -// CHECK: [[CONT]] -// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: store i32 1, i32* [[DID_IT]] -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] -// CHECK: [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store i8* [[A_ADDR]], i8** [[A_PTR_REF]], -// CHECK: [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store i8* {{.+}}, i8** [[C_PTR_REF]], -// CHECK: [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached -// CHECK: [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: [[TC_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[TC_THREADPRIVATE_ADDR]] to i8* -// CHECK: store i8* [[TC_PTR_REF_VOID_PTR]], i8** [[TC_PTR_REF]], -// CHECK: [[A2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 -// CHECK: [[BITCAST:%.+]] = bitcast [2 x i8]* [[A2_ADDR]] to i8* -// CHECK: store i8* [[BITCAST]], i8** [[A2_PTR_REF]], -// CHECK: [[TC2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 -// CHECK: [[TC2_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached -// CHECK: [[TC2_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC2_THREADPRIVATE_ADDR_VOID_PTR]] to [2 x [[TEST_CLASS_TY]]]* -// CHECK: [[TC2_PTR_REF_VOID_PTR:%.+]] = bitcast [2 x [[TEST_CLASS_TY]]]* [[TC2_THREADPRIVATE_ADDR]] to i8* -// CHECK: store i8* [[TC2_PTR_REF_VOID_PTR]], i8** [[TC2_PTR_REF]], -// CHECK: [[COPY_LIST_VOID_PTR:%.+]] = bitcast [5 x i8*]* [[COPY_LIST]] to i8* -// CHECK: [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]], -// CHECK: call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i64 40, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]]) -// CHECK-NOT: call {{.+}} @__kmpc_cancel_barrier +// ALL: store i32 0, i32* [[DID_IT]] +// ALL: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0 +// ALL-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// ALL: [[THEN]] +// ALL-NEXT: invoke void [[FOO]]() +// ALL: to label {{%?}}[[CONT:.+]] unwind +// ALL: [[CONT]] +// ALL: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// ALL: store i32 1, i32* [[DID_IT]] +// ALL-NEXT: br label {{%?}}[[EXIT]] +// ALL: [[EXIT]] +// ALL: [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: store i8* [[A_ADDR]], i8** [[A_PTR_REF]], +// ALL: [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// ALL: store i8* {{.+}}, i8** [[C_PTR_REF]], +// ALL: [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// ALL: [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached +// ALL: [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: [[TC_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[TC_THREADPRIVATE_ADDR]] to i8* +// ALL: store i8* [[TC_PTR_REF_VOID_PTR]], i8** [[TC_PTR_REF]], +// ALL: [[A2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// ALL: [[BITCAST:%.+]] = bitcast [2 x i8]* [[A2_ADDR]] to i8* +// ALL: store i8* [[BITCAST]], i8** [[A2_PTR_REF]], +// ALL: [[TC2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// ALL: [[TC2_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached +// ALL: [[TC2_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC2_THREADPRIVATE_ADDR_VOID_PTR]] to [2 x [[TEST_CLASS_TY]]]* +// ALL: [[TC2_PTR_REF_VOID_PTR:%.+]] = bitcast [2 x [[TEST_CLASS_TY]]]* [[TC2_THREADPRIVATE_ADDR]] to i8* +// ALL: store i8* [[TC2_PTR_REF_VOID_PTR]], i8** [[TC2_PTR_REF]], +// ALL: [[COPY_LIST_VOID_PTR:%.+]] = bitcast [5 x i8*]* [[COPY_LIST]] to i8* +// ALL: [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]], +// ALL: call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i64 40, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]]) +// ALL-NOT: call {{.+}} @__kmpc_cancel_barrier #pragma omp single copyprivate(a, c, tc, a2, tc2) foo(); -// CHECK-NOT: call i32 @__kmpc_single -// CHECK-NOT: call void @__kmpc_end_single + // ALL-NOT: call i32 @__kmpc_single + // ALL-NOT: call void @__kmpc_end_single return a; } // OMP50-LABEL: declare i8* @__kmpc_threadprivate_cached( -// CHECK: void [[COPY_FUNC]](i8* %0, i8* %1) -// CHECK: store i8* %0, i8** [[DST_ADDR_REF:%.+]], -// CHECK: store i8* %1, i8** [[SRC_ADDR_REF:%.+]], -// CHECK: [[DST_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_ADDR_REF]], -// CHECK: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [5 x i8*]* -// CHECK: [[SRC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_ADDR_REF]], -// CHECK: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [5 x i8*]* -// CHECK: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[DST_A_ADDR:%.+]] = load i8*, i8** [[DST_A_ADDR_REF]], -// CHECK: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]], -// CHECK: [[SRC_A_VAL:%.+]] = load i8, i8* [[SRC_A_ADDR]], -// CHECK: store i8 [[SRC_A_VAL]], i8* [[DST_A_ADDR]], -// CHECK: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]], -// CHECK: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]], -// CHECK: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_C_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_C_ADDR]]) -// CHECK: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]], -// CHECK: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]], -// CHECK: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]]) -// CHECK: [[DST_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 -// CHECK: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]], -// CHECK: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 -// CHECK: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]], -// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false) -// CHECK: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 -// CHECK: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]], -// CHECK: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: [[SRC_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 -// CHECK: [[SRC_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC2_ADDR_REF]], -// CHECK: [[SRC_TC2_ADDR:%.+]] = bitcast i8* [[SRC_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* -// CHECK: br i1 -// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* %{{.+}}, [[TEST_CLASS_TY]]* {{.*}}) -// CHECK: br i1 -// CHECK: ret void - +// ALL: void [[COPY_FUNC]](i8* %0, i8* %1) +// ALL: store i8* %0, i8** [[DST_ADDR_REF:%.+]], +// ALL: store i8* %1, i8** [[SRC_ADDR_REF:%.+]], +// ALL: [[DST_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_ADDR_REF]], +// ALL: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [5 x i8*]* +// ALL: [[SRC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_ADDR_REF]], +// ALL: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [5 x i8*]* +// ALL: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[DST_A_ADDR:%.+]] = load i8*, i8** [[DST_A_ADDR_REF]], +// ALL: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]], +// ALL: [[SRC_A_VAL:%.+]] = load i8, i8* [[SRC_A_ADDR]], +// ALL: store i8 [[SRC_A_VAL]], i8* [[DST_A_ADDR]], +// ALL: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// ALL: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]], +// ALL: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// ALL: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]], +// ALL: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_C_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_C_ADDR]]) +// ALL: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// ALL: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]], +// ALL: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// ALL: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]], +// ALL: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]]) +// ALL: [[DST_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// ALL: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]], +// ALL: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// ALL: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]], +// ALL: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false) +// ALL: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// ALL: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]], +// ALL: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: [[SRC_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// ALL: [[SRC_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC2_ADDR_REF]], +// ALL: [[SRC_TC2_ADDR:%.+]] = bitcast i8* [[SRC_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]* +// ALL: br i1 +// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* %{{.+}}, [[TEST_CLASS_TY]]* {{.*}}) +// ALL: br i1 +// ALL: ret void // OMP50-LABEL: void @_ZN3SSTIdEC2Ev( // OMP50: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0