diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1552,6 +1552,92 @@ const OMPExecutableDirective &, llvm::SmallVectorImpl &) {} +bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + InsertPointTy AllocaIP) { + if (!CGF.HaveInsertPoint()) + return false; + // threadprivate_var1 = master_threadprivate_var1; + // operator=(threadprivate_var2, master_threadprivate_var2); + // ... + // __kmpc_barrier(&loc, global_tid); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + llvm::DenseSet CopiedVars; + llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; + for (const auto *C : D.template getClausesOfKind()) { + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *VD = cast(cast(*IRef)->getDecl()); + QualType Type = VD->getType(); + if (CopiedVars.insert(VD->getCanonicalDecl()).second) { + // Get the address of the master variable. If we are emitting code with + // TLS support, the address is passed from the master as field in the + // captured declaration. + Address MasterAddr = Address::invalid(); + if (CGF.getLangOpts().OpenMPUseTLS && + CGF.getContext().getTargetInfo().isTLSSupported()) { + assert(CGF.CapturedStmtInfo->lookup(VD) && + "Copyin threadprivates should have been captured!"); + const auto *VDCanon = VD->getCanonicalDecl(); + auto I = CGF.LocalDeclMap.find(VDCanon); + if (I == CGF.LocalDeclMap.end()) { + Address Addr(CGF.CGM.GetAddrOfGlobal(VDCanon), + CGF.getContext().getDeclAlign(VDCanon)); + CGF.LocalDeclMap.try_emplace(VDCanon, Addr); + } + DeclRefExpr DRE(CGF.getContext(), const_cast(VD), true, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + MasterAddr = CGF.EmitLValue(&DRE).getAddress(CGF); + CGF.LocalDeclMap.erase(VDCanon); + } else { + MasterAddr = Address(VD->isStaticLocal() + ? CGF.CGM.getStaticLocalDeclAddress(VD) + : CGF.CGM.GetAddrOfGlobal(VD), + CGF.getContext().getDeclAlign(VD)); + } + + // Get the address of the threadprivate variable. + Address PrivateAddr = CGF.EmitLValue(*IRef).getAddress(CGF); + if (CopiedVars.size() == 1) { + // At first check if current thread is a master thread. If it is, no + // need to copy data. + InsertPointTy CopyBeginIP = OMPBuilder->CreateCopyinClauseBlocks( + AllocaIP, MasterAddr.getPointer(), PrivateAddr.getPointer(), + CGF.IntPtrTy, /*BranchtoEnd*/ false); + CGF.Builder.restoreIP(CopyBeginIP); + CopyBegin = CopyBeginIP.getBlock(); + assert(CopyBegin && "CopyIn Basic Block was not generated!"); + llvm::BranchInst *EntryCBI = llvm::dyn_cast_or_null( + AllocaIP.getBlock()->getTerminator()); + CopyEnd = EntryCBI ? EntryCBI->getSuccessor(1) : nullptr; + assert(CopyEnd && "No unique successor for CopyIn Basic Block!"); + } + const auto *SrcVD = + cast(cast(*ISrcRef)->getDecl()); + const auto *DestVD = + cast(cast(*IDestRef)->getDecl()); + CGF.EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + } + if (CopyEnd) { + // Exit out of copying procedure for non-master thread. + CGF.EmitBranch(CopyEnd); + if (llvm::Instruction *CopyEndTI = + CopyEnd ? CopyEnd->getTerminator() : nullptr) + CGF.Builder.SetInsertPoint(CopyEndTI); + else + CGF.Builder.SetInsertPoint(CopyEnd); + return true; + } + return false; +} + Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( CodeGenFunction &CGF, const VarDecl *VD) { CodeGenModule &CGM = CGF.CGM; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1598,6 +1598,9 @@ /// Gets the OpenMP-specific address of the local variable /p VD. static Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); + static bool EmitOMPCopyinClause(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + InsertPointTy AllocaIP); /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation /// \param FirstSeparator First separator used between the initial two diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -1,9 +1,12 @@ -// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,CHECK // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DLAMBDA -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -fblocks -DBLOCKS -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s @@ -46,21 +49,22 @@ ~S() {} }; -// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* +// IRBUILDER-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 34, i32 0, i32 0, i8* // TLS-CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } // TLS-CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } // TLS-CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* -// CHECK-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122, -// CHECK-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], -// CHECK-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer, -// CHECK-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer, -// CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333, -// CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3], -// CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer, -// CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer, +// ALL-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122, +// ALL-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +// ALL-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer, +// ALL-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer, +// ALL-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333, +// ALL-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3], +// ALL-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer, +// ALL-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer, // TLS-CHECK-DAG: [[T_VAR:@.+]] = internal thread_local global i{{[0-9]+}} 1122, // TLS-CHECK-DAG: [[VEC:@.+]] = internal thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], // TLS-CHECK-DAG: [[S_ARR:@.+]] = internal thread_local global [2 x [[S_FLOAT_TY]]] zeroinitializer, @@ -230,14 +234,14 @@ #endif } -// CHECK-LABEL: @main -// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]* -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() -// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* -// CHECK: ret +// ALL-LABEL: @main +// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]* +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) +// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* +// ALL: ret // TLS-CHECK-LABEL: @main // TLS-CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], @@ -248,22 +252,56 @@ // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // TLS-CHECK: ret -// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] + +// threadprivate_t_var = t_var; +// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] +// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} +// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) +// IRBUILDER: add nsw i32 %{{.+}}, 1 + +// IRBUILDER: [[NOT_MASTER]] +// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], +// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + +// ALL: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] + // TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // threadprivate_t_var = t_var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] -// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} -// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} -// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] -// CHECK: [[NOT_MASTER]] -// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], -// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]] +// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}} +// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) + +// ALL: [[NOT_MASTER]] +// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]], +// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** % // TLS-CHECK: [[MASTER_REF2:%.+]] = load [2 x i32]*, [2 x i32]** % @@ -278,21 +316,21 @@ // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[T_VAR]] // threadprivate_vec = vec; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]] -// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]] +// ALL: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8* // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]] -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]] +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[MASTER_REF3]] to [[S_FLOAT_TY]]* // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_FLOAT_TY]]* {{.*}}[[MASTER_CAST]] @@ -305,8 +343,8 @@ // TLS-CHECK: [[ARR_DONE]] // threadprivate_var = var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]] -// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]]) +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]] +// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]]) // CHECK: [[DONE]] // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.*}}[[VAR]], [[S_FLOAT_TY]]* {{.*}}[[MASTER_REF4]]) @@ -356,36 +394,67 @@ // TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // TLS-CHECK: ret void -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) -// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* -// CHECK: ret +// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]], +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*)) +// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// ALL: ret // TLS-CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // TLS-CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]* // TLS-CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), // TLS-CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), + +// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] +// threadprivate_t_var = t_var; +// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] +// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} +// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] +// IRBUILDER: ret void +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) +// IRBUILDER: [[NOT_MASTER]] +// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], +// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, + // -// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// ALL: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]], // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]], + +// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32 +// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]] +// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]] +// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]] // // TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], // threadprivate_t_var = t_var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] -// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} -// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} -// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] -// CHECK: [[NOT_MASTER]] -// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128 -// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]] +// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}} +// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}} +// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]] + +// IRBUILDER: ret void + +// IRBUILDER: [[DONE]] +// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}}) +// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]]) + +// ALL: [[NOT_MASTER]] +// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128 +// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128 // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** % // TLS-CHECK: [[MASTER_REF1:%.+]] = load [2 x i32]*, [2 x i32]** % @@ -400,21 +469,21 @@ // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]], align 128 // threadprivate_vec = vec; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] -// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]] +// ALL: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8* // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]] // threadprivate_s_arr = s_arr; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]] -// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 -// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] -// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] -// CHECK: [[S_ARR_BODY]] -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}) -// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]] +// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2 +// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]] +// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]] +// ALL: [[S_ARR_BODY]] +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]] // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[MASTER_REF2]] to [[S_INT_TY]]* // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_INT_TY]]* {{.*}}[[MASTER_CAST]] @@ -427,8 +496,8 @@ // TLS-CHECK: [[ARR_DONE]] // threadprivate_var = var; -// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]] -// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]]) +// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]] +// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]]) // CHECK: [[DONE]] // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.*}}[[TMAIN_VAR]], [[S_INT_TY]]* {{.*}}[[MASTER_REF3]])