diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5312,6 +5312,74 @@ } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + if (S.hasClausesOfKind()) { + // The ordered directive with depend clause. + assert(!S.hasAssociatedStmt() && + "No associated statement must be in ordered depend construct."); + InsertPointTy AllocaIP(AllocaInsertPt->getParent(), + AllocaInsertPt->getIterator()); + for (const auto *DC : S.getClausesOfKind()) { + unsigned NumLoops = DC->getNumLoops(); + QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/64, /*Signed=*/1); + llvm::SmallVector StoreValues; + for (unsigned I = 0; I < NumLoops; I++) { + const Expr *CounterVal = DC->getLoopData(I); + assert(CounterVal); + llvm::Value *StoreValue = EmitScalarConversion( + EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + StoreValues.emplace_back(StoreValue); + } + bool IsDependSource = false; + if (DC->getDependencyKind() == OMPC_DEPEND_source) + IsDependSource = true; + Builder.restoreIP(OMPBuilder.createOrderedDepend( + Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr", + IsDependSource)); + } + } else { + // The ordered directive with threads or simd clause, or without clause. + // Without clause, it behaves as if the threads clause is specified. + const auto *C = S.getSingleClause(); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + if (C) { + llvm::SmallVector CapturedVars; + GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); + assert(S.getBeginLoc().isValid() && + "Outlined function call location must be valid."); + ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); + OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB, + OutlinedFn, CapturedVars); + } else { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, + FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(), + CodeGenIP, FiniBB); + } + }; + + OMPLexicalScope Scope(*this, S, OMPD_unknown); + Builder.restoreIP( + OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); + } + return; + } + if (S.hasClausesOfKind()) { assert(!S.hasAssociatedStmt() && "No associated statement must be in ordered depend construct."); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1775,6 +1775,24 @@ CGF.Builder.CreateBr(&FiniBB); } + static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB, llvm::Function *Fn, + ArrayRef Args) { + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator()) + CodeGenIPBBTI->eraseFromParent(); + + CGF.Builder.SetInsertPoint(CodeGenIPBB); + + if (Fn->doesNotThrow()) + CGF.EmitNounwindRuntimeCall(Fn, Args); + else + CGF.EmitRuntimeCall(Fn, Args); + + if (CGF.Builder.saveIP().isSet()) + CGF.Builder.CreateBr(&FiniBB); + } + /// RAII for preserving necessary info during Outlined region body codegen. class OutlinedRegionBodyRAII { diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5320,8 +5320,9 @@ if (Rel == BO_LE || Rel == BO_GE) { // Add one to the range if the relational operator is inclusive. - Range = - AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range)); + Range = AssertSuccess(Actions.BuildBinOp( + nullptr, {}, BO_Add, Range, + Actions.ActOnIntegerConstant(SourceLocation(), 1).get())); } // Divide by the absolute step amount. diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -1,11 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -128,7 +136,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -137,9 +145,12 @@ // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -157,6 +168,7 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -188,6 +200,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -195,7 +208,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -213,7 +228,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -222,9 +237,11 @@ // CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -243,6 +260,7 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -270,6 +288,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -277,7 +296,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -303,7 +324,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -336,9 +357,11 @@ // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -369,6 +392,7 @@ // CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK1-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK1-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -400,6 +424,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK1-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -409,7 +434,9 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -430,7 +457,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -440,9 +467,11 @@ // CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -470,6 +499,7 @@ // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -501,6 +531,7 @@ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -508,7 +539,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -535,7 +568,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK1-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -622,9 +655,11 @@ // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -632,13 +667,15 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK1: omp.inner.for.cond29: +// CHECK1-IRBUILDER: omp.inner.for.cond30: +// CHECK1-NORMAL: omp.inner.for.cond29: // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK1: omp.inner.for.body32: +// CHECK1-IRBUILDER: omp.inner.for.body33: +// CHECK1-NORMAL: omp.inner.for.body32: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -650,15 +687,19 @@ // CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK1: omp.body.continue37: +// CHECK1-IRBUILDER: omp.body.continue38: +// CHECK1-NORMAL: omp.body.continue37: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK1: omp.inner.for.inc38: +// CHECK1-IRBUILDER: omp.inner.for.inc39: +// CHECK1-NORMAL: omp.inner.for.inc38: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1: omp.inner.for.end40: +// CHECK1-IRBUILDER: omp.inner.for.end42: +// CHECK1-NORMAL: omp.inner.for.end40: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] @@ -681,7 +722,9 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -725,7 +768,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -734,9 +777,12 @@ // CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -754,6 +800,7 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -785,6 +832,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -792,7 +840,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -810,7 +860,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -819,9 +869,11 @@ // CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -840,6 +892,7 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -867,6 +920,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -874,7 +928,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -900,7 +956,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -933,9 +989,11 @@ // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -966,6 +1024,7 @@ // CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK2-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK2-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -997,6 +1056,7 @@ // CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK2-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1006,7 +1066,9 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1027,7 +1089,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1037,9 +1099,11 @@ // CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1067,6 +1131,7 @@ // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK2-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1098,6 +1163,7 @@ // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1105,7 +1171,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1132,7 +1200,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK2-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1219,9 +1287,11 @@ // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1229,13 +1299,15 @@ // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK2: omp.inner.for.cond29: +// CHECK2-IRBUILDER: omp.inner.for.cond30: +// CHECK2-NORMAL: omp.inner.for.cond29: // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK2: omp.inner.for.body32: +// CHECK2-IRBUILDER: omp.inner.for.body33: +// CHECK2-NORMAL: omp.inner.for.body32: // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1247,15 +1319,19 @@ // CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK2: omp.body.continue37: +// CHECK2-IRBUILDER: omp.body.continue38: +// CHECK2-NORMAL: omp.body.continue37: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK2: omp.inner.for.inc38: +// CHECK2-IRBUILDER: omp.inner.for.inc39: +// CHECK2-NORMAL: omp.inner.for.inc38: // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK2: omp.inner.for.end40: +// CHECK2-IRBUILDER: omp.inner.for.end42: +// CHECK2-NORMAL: omp.inner.for.end40: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1278,7 +1354,9 @@ // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1322,7 +1400,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1331,9 +1409,12 @@ // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1351,6 +1432,7 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -1382,6 +1464,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1389,7 +1472,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1407,7 +1492,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1416,9 +1501,11 @@ // CHECK3-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1437,6 +1524,7 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -1464,6 +1552,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1471,7 +1560,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1497,7 +1588,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1530,9 +1621,11 @@ // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1563,6 +1656,7 @@ // CHECK3-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -1594,6 +1688,7 @@ // CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK3-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1603,7 +1698,9 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1624,7 +1721,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1634,9 +1731,11 @@ // CHECK3-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1664,6 +1763,7 @@ // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1695,6 +1795,7 @@ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1702,7 +1803,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1729,7 +1832,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK3-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1816,9 +1919,11 @@ // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1826,13 +1931,15 @@ // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK3: omp.inner.for.cond29: +// CHECK3-IRBUILDER: omp.inner.for.cond30: +// CHECK3-NORMAL: omp.inner.for.cond29: // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK3: omp.inner.for.body32: +// CHECK3-IRBUILDER: omp.inner.for.body33: +// CHECK3-NORMAL: omp.inner.for.body32: // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1844,15 +1951,19 @@ // CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK3: omp.body.continue37: +// CHECK3-IRBUILDER: omp.body.continue38: +// CHECK3-NORMAL: omp.body.continue37: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK3: omp.inner.for.inc38: +// CHECK3-IRBUILDER: omp.inner.for.inc39: +// CHECK3-NORMAL: omp.inner.for.inc38: // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK3: omp.inner.for.end40: +// CHECK3-IRBUILDER: omp.inner.for.end42: +// CHECK3-NORMAL: omp.inner.for.end40: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1875,7 +1986,9 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1919,7 +2032,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1928,9 +2041,12 @@ // CHECK4-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1948,6 +2064,7 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -1979,6 +2096,7 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -1986,7 +2104,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2004,7 +2124,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2013,9 +2133,11 @@ // CHECK4-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2034,6 +2156,7 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -2061,6 +2184,7 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2068,7 +2192,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2094,7 +2220,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2127,9 +2253,11 @@ // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2160,6 +2288,7 @@ // CHECK4-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK4-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -2191,6 +2320,7 @@ // CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK4-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2200,7 +2330,9 @@ // CHECK4: omp.dispatch.end: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2221,7 +2353,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2231,9 +2363,11 @@ // CHECK4-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2261,6 +2395,7 @@ // CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -2292,6 +2427,7 @@ // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2299,7 +2435,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2326,7 +2464,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK4-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -2413,9 +2551,11 @@ // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2423,13 +2563,15 @@ // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK4: omp.inner.for.cond29: +// CHECK4-IRBUILDER: omp.inner.for.cond30: +// CHECK4-NORMAL: omp.inner.for.cond29: // CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK4: omp.inner.for.body32: +// CHECK4-IRBUILDER: omp.inner.for.body33: +// CHECK4-NORMAL: omp.inner.for.body32: // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -2441,15 +2583,19 @@ // CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 // CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK4: omp.body.continue37: +// CHECK4-IRBUILDER: omp.body.continue38: +// CHECK4-NORMAL: omp.body.continue37: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK4: omp.inner.for.inc38: +// CHECK4-IRBUILDER: omp.inner.for.inc39: +// CHECK4-NORMAL: omp.inner.for.inc38: // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK4: omp.inner.for.end40: +// CHECK4-IRBUILDER: omp.inner.for.end42: +// CHECK4-NORMAL: omp.inner.for.end40: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2472,7 +2618,9 @@ // CHECK4: .omp.final.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/ordered_doacross_codegen.c b/clang/test/OpenMP/ordered_doacross_codegen.c --- a/clang/test/OpenMP/ordered_doacross_codegen.c +++ b/clang/test/OpenMP/ordered_doacross_codegen.c @@ -1,6 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-pch -o %t %s @@ -20,7 +24,7 @@ int main() { int i; // CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]], -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) // CHECK: icmp // CHECK-NEXT: br i1 % // CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8* @@ -32,13 +36,13 @@ // CHECK: store i64 1, i64* % // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8* -// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) -// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) +// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) #pragma omp for ordered(1) for (i = 0; i < n; ++i) { a[i] = b[i] + 1; foo(); -// CHECK: call void [[FOO:.+]]( +// CHECK: call void (...) [[FOO:.+]]( // CHECK: load i32, i32* [[I:%.+]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 1 @@ -46,11 +50,13 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID1:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID1]], i64* [[TMP]]) #pragma omp ordered depend(source) c[i] = c[i] + 1; foo(); -// CHECK: call void [[FOO]] +// CHECK: call void (...) [[FOO]] // CHECK: load i32, i32* [[I]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 2 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 @@ -59,12 +65,14 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID2:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID2]], i64* [[TMP]]) #pragma omp ordered depend(sink : i - 2) d[i] = a[i - 2]; } // CHECK: call void @__kmpc_for_static_fini( - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: ret i32 0 return 0; } diff --git a/clang/test/OpenMP/ordered_doacross_codegen.cpp b/clang/test/OpenMP/ordered_doacross_codegen.cpp --- a/clang/test/OpenMP/ordered_doacross_codegen.cpp +++ b/clang/test/OpenMP/ordered_doacross_codegen.cpp @@ -1,6 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -37,7 +41,7 @@ int main() { int i; // CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]], -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) // CHECK: icmp // CHECK-NEXT: br i1 % // CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8* @@ -49,8 +53,8 @@ // CHECK: store i64 1, i64* % // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8* -// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) -// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) +// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) #pragma omp for ordered(1) for (int i = 0; i < n; ++i) { a[i] = b[i] + 1; @@ -63,7 +67,9 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID18]], i64* [[TMP]]) #pragma omp ordered depend(source) c[i] = c[i] + 1; foo(); @@ -76,16 +82,18 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID30:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID30]], i64* [[TMP]]) #pragma omp ordered depend(sink : i - 2) d[i] = a[i - 2]; } // CHECK: landingpad - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: br label % // CHECK: call void @__kmpc_for_static_fini( - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: ret i32 0 return 0; } @@ -93,7 +101,7 @@ // CHECK-LABEL: main1 int main1() { // CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]], -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) // CHECK: icmp // CHECK-NEXT: br i1 % // CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8* @@ -105,8 +113,8 @@ // CHECK: store i64 1, i64* % // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8* -// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) -// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]]) +// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) #pragma omp for ordered(1) for (int i = n; i > 0; --i) { a[i] = b[i] + 1; @@ -120,7 +128,9 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID17:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID17]], i64* [[TMP]]) #pragma omp ordered depend(source) c[i] = c[i] + 1; foo(); @@ -134,16 +144,18 @@ // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID29:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID29]], i64* [[TMP]]) #pragma omp ordered depend(sink : i - 2) d[i] = a[i - 2]; } // CHECK: landingpad - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: br label % // CHECK: call void @__kmpc_for_static_fini( - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: ret i32 0 return 0; } @@ -161,7 +173,7 @@ void baz(T, T); TestStruct() { // CHECK: [[DIMS:%.+]] = alloca [2 x [[KMP_DIM]]], -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) // CHECK: [[CAST:%.+]] = bitcast [2 x [[KMP_DIM]]]* [[DIMS]] to i8* // CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 48, i1 false) // CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0 @@ -176,8 +188,8 @@ // CHECK: store i64 1, i64* % // CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0 // CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8* -// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 2, i8* [[CAST]]) -// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 2, i8* [[CAST]]) +// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) #pragma omp for ordered(2) for (T j = 0; j < M; j++) for (i = 0; i < n; i += 2) { @@ -190,34 +202,42 @@ // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 1 // CHECK-NEXT: sext i32 %{{.+}} to i64 -// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 -// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]], +// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]], // CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF:%.+]], // CHECK-NEXT: load i32, i32* [[I]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 2 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 2 // CHECK-NEXT: sext i32 %{{.+}} to i64 +// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID18]], i64* [[TMP]]) // CHECK-NEXT: load i32, i32* [[J:%.+]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 1 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 1 // CHECK-NEXT: sext i32 %{{.+}} to i64 -// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 -// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]], +// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]], // CHECK-NEXT: load i32, i32* [[I]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 2 // CHECK-NEXT: sext i32 %{{.+}} to i64 +// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID27:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID27]], i64* [[TMP]]) #pragma omp ordered depend(sink : j, i - 2) depend(sink : j - 1, i) b[i][j] = bar(a[i][j], b[i - 1][j], b[i][j - 1]); // CHECK: invoke {{.+TestStruct.+bar}} @@ -228,27 +248,31 @@ // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 1 // CHECK-NEXT: sext i32 %{{.+}} to i64 -// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 -// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]], +// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]], // CHECK-NEXT: load i32, i32* [[I]], // CHECK-NEXT: sub nsw i32 %{{.+}}, 0 // CHECK-NEXT: sdiv i32 %{{.+}}, 2 // CHECK-NEXT: sext i32 %{{.+}} to i64 +// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0 +// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1 // CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]], // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0 -// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]]) +// CHECK-IRBUILDER-NEXT: [[GTID58:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]]) +// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID58]], i64* [[TMP]]) #pragma omp ordered depend(source) baz(a[i][j], b[i][j]); } } // CHECK: landingpad - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: br label % // CHECK: call void @__kmpc_for_static_fini( - // CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) + // CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]]) // CHECK: ret }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -838,6 +838,35 @@ FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); + /// Generator for '#omp ordered depend (source | sink)' + /// + /// \param Loc The insert and source location description. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param NumLoops The number of loops in depend clause. + /// \param StoreValues The value will be stored in vector address. + /// \param Name The name of alloca instruction. + /// \param IsDependSource If true, depend source; otherwise, depend sink. + /// + /// \return The insertion position *after* the ordered. + InsertPointTy createOrderedDepend(const LocationDescription &Loc, + InsertPointTy AllocaIP, unsigned NumLoops, + ArrayRef StoreValues, + const Twine &Name, bool IsDependSource); + + /// Generator for '#omp ordered [threads | simd]' + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param FiniCB Callback to finalize variable copies. + /// \param IsThreads If true, with threads clause or without clause; + /// otherwise, with simd clause; + /// + /// \returns The insertion position *after* the ordered. + InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, + bool IsThreads); + /// Generator for '#omp sections' /// /// \param Loc The insert and source location description. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2145,6 +2145,74 @@ /*Conditional*/ false, /*hasFinalize*/ true); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, + InsertPointTy AllocaIP, unsigned NumLoops, + ArrayRef StoreValues, + const Twine &Name, bool IsDependSource) { + if (!updateToLocation(Loc)) + return Loc.IP; + + // Allocate space for vector and generate alloc instruction. + auto *ArrI64Ty = ArrayType::get(Int64, NumLoops); + Builder.restoreIP(AllocaIP); + AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name); + ArgsBase->setAlignment(Align(8)); + Builder.restoreIP(Loc.IP); + + // Store the index value with offset in depend vector. + for (unsigned I = 0; I < NumLoops; ++I) { + Value *DependAddrGEPIter = Builder.CreateInBoundsGEP( + ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)}); + Builder.CreateStore(StoreValues[I], DependAddrGEPIter); + } + + Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP( + ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)}); + + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP}; + + Function *RTLFn = nullptr; + if (IsDependSource) + RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post); + else + RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait); + Builder.CreateCall(RTLFn, Args); + + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( + const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, bool IsThreads) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_ordered; + Instruction *EntryCall = nullptr; + Instruction *ExitCall = nullptr; + + if (IsThreads) { + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId}; + + Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered); + EntryCall = Builder.CreateCall(EntryRTLFn, Args); + + Function *ExitRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered); + ExitCall = Builder.CreateCall(ExitRTLFn, Args); + } + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ false, /*hasFinalize*/ true); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -2120,6 +2120,320 @@ EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy); } +TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + LLVMContext &Ctx = M->getContext(); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + InsertPointTy AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + + unsigned NumLoops = 2; + SmallVector StoreValues; + Type *LCTy = Type::getInt64Ty(Ctx); + StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); + StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); + + // Test for "#omp ordered depend(source)" + Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, + StoreValues, ".cnt.addr", + /*IsDependSource=*/true)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + AllocaInst *AllocInst = dyn_cast(&BB->front()); + ASSERT_NE(AllocInst, nullptr); + ArrayType *ArrType = dyn_cast(AllocInst->getAllocatedType()); + EXPECT_EQ(ArrType->getNumElements(), NumLoops); + EXPECT_TRUE( + AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); + + Instruction *IterInst = dyn_cast(AllocInst); + for (unsigned Iter = 0; Iter < NumLoops; Iter++) { + GetElementPtrInst *DependAddrGEPIter = + dyn_cast(IterInst->getNextNode()); + ASSERT_NE(DependAddrGEPIter, nullptr); + EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); + EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); + auto *FirstIdx = dyn_cast(DependAddrGEPIter->getOperand(1)); + auto *SecondIdx = dyn_cast(DependAddrGEPIter->getOperand(2)); + ASSERT_NE(FirstIdx, nullptr); + ASSERT_NE(SecondIdx, nullptr); + EXPECT_EQ(FirstIdx->getValue(), 0); + EXPECT_EQ(SecondIdx->getValue(), Iter); + StoreInst *StoreValue = + dyn_cast(DependAddrGEPIter->getNextNode()); + ASSERT_NE(StoreValue, nullptr); + EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); + EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); + IterInst = dyn_cast(StoreValue); + } + + GetElementPtrInst *DependBaseAddrGEP = + dyn_cast(IterInst->getNextNode()); + ASSERT_NE(DependBaseAddrGEP, nullptr); + EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); + EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); + auto *FirstIdx = dyn_cast(DependBaseAddrGEP->getOperand(1)); + auto *SecondIdx = dyn_cast(DependBaseAddrGEP->getOperand(2)); + ASSERT_NE(FirstIdx, nullptr); + ASSERT_NE(SecondIdx, nullptr); + EXPECT_EQ(FirstIdx->getValue(), 0); + EXPECT_EQ(SecondIdx->getValue(), 0); + + CallInst *GTID = dyn_cast(DependBaseAddrGEP->getNextNode()); + ASSERT_NE(GTID, nullptr); + EXPECT_EQ(GTID->getNumArgOperands(), 1U); + EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); + EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); + EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); + + CallInst *Depend = dyn_cast(GTID->getNextNode()); + ASSERT_NE(Depend, nullptr); + EXPECT_EQ(Depend->getNumArgOperands(), 3U); + EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post"); + EXPECT_TRUE(isa(Depend->getArgOperand(0))); + EXPECT_EQ(Depend->getArgOperand(1), GTID); + EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); +} + +TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + LLVMContext &Ctx = M->getContext(); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + InsertPointTy AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + + unsigned NumLoops = 2; + SmallVector StoreValues; + Type *LCTy = Type::getInt64Ty(Ctx); + StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); + StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); + + // Test for "#omp ordered depend(sink: vec)" + Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, + StoreValues, ".cnt.addr", + /*IsDependSource=*/false)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + AllocaInst *AllocInst = dyn_cast(&BB->front()); + ASSERT_NE(AllocInst, nullptr); + ArrayType *ArrType = dyn_cast(AllocInst->getAllocatedType()); + EXPECT_EQ(ArrType->getNumElements(), NumLoops); + EXPECT_TRUE( + AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); + + Instruction *IterInst = dyn_cast(AllocInst); + for (unsigned Iter = 0; Iter < NumLoops; Iter++) { + GetElementPtrInst *DependAddrGEPIter = + dyn_cast(IterInst->getNextNode()); + ASSERT_NE(DependAddrGEPIter, nullptr); + EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); + EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); + auto *FirstIdx = dyn_cast(DependAddrGEPIter->getOperand(1)); + auto *SecondIdx = dyn_cast(DependAddrGEPIter->getOperand(2)); + ASSERT_NE(FirstIdx, nullptr); + ASSERT_NE(SecondIdx, nullptr); + EXPECT_EQ(FirstIdx->getValue(), 0); + EXPECT_EQ(SecondIdx->getValue(), Iter); + StoreInst *StoreValue = + dyn_cast(DependAddrGEPIter->getNextNode()); + ASSERT_NE(StoreValue, nullptr); + EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); + EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); + IterInst = dyn_cast(StoreValue); + } + + GetElementPtrInst *DependBaseAddrGEP = + dyn_cast(IterInst->getNextNode()); + ASSERT_NE(DependBaseAddrGEP, nullptr); + EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); + EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); + auto *FirstIdx = dyn_cast(DependBaseAddrGEP->getOperand(1)); + auto *SecondIdx = dyn_cast(DependBaseAddrGEP->getOperand(2)); + ASSERT_NE(FirstIdx, nullptr); + ASSERT_NE(SecondIdx, nullptr); + EXPECT_EQ(FirstIdx->getValue(), 0); + EXPECT_EQ(SecondIdx->getValue(), 0); + + CallInst *GTID = dyn_cast(DependBaseAddrGEP->getNextNode()); + ASSERT_NE(GTID, nullptr); + EXPECT_EQ(GTID->getNumArgOperands(), 1U); + EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); + EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); + EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); + + CallInst *Depend = dyn_cast(GTID->getNextNode()); + ASSERT_NE(Depend, nullptr); + EXPECT_EQ(Depend->getNumArgOperands(), 3U); + EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait"); + EXPECT_TRUE(isa(Depend->getArgOperand(0))); + EXPECT_EQ(Depend->getArgOperand(1), GTID); + EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); +} + +TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = + Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); + + BasicBlock *EntryBB = nullptr; + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &FiniBB) { + EntryBB = FiniBB.getUniquePredecessor(); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); + EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); + EXPECT_EQ(EntryBB, CodeGenIPBB); + + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(F->arg_begin(), PrivAI); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + }; + + auto FiniCB = [&](InsertPointTy IP) { + BasicBlock *IPBB = IP.getBlock(); + EXPECT_NE(IPBB->end(), IP.getPoint()); + }; + + // Test for "#omp ordered [threads]" + Builder.restoreIP( + OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + EXPECT_NE(EntryBB->getTerminator(), nullptr); + + CallInst *OrderedEntryCI = nullptr; + for (auto &EI : *EntryBB) { + Instruction *Cur = &EI; + if (isa(Cur)) { + OrderedEntryCI = cast(Cur); + if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") + break; + OrderedEntryCI = nullptr; + } + } + EXPECT_NE(OrderedEntryCI, nullptr); + EXPECT_EQ(OrderedEntryCI->getNumArgOperands(), 2U); + EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); + EXPECT_TRUE(isa(OrderedEntryCI->getArgOperand(0))); + + CallInst *OrderedEndCI = nullptr; + for (auto &FI : *EntryBB) { + Instruction *Cur = &FI; + if (isa(Cur)) { + OrderedEndCI = cast(Cur); + if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") + break; + OrderedEndCI = nullptr; + } + } + EXPECT_NE(OrderedEndCI, nullptr); + EXPECT_EQ(OrderedEndCI->getNumArgOperands(), 2U); + EXPECT_TRUE(isa(OrderedEndCI->getArgOperand(0))); + EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); +} + +TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = + Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); + + BasicBlock *EntryBB = nullptr; + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &FiniBB) { + EntryBB = FiniBB.getUniquePredecessor(); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); + EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); + EXPECT_EQ(EntryBB, CodeGenIPBB); + + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(F->arg_begin(), PrivAI); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + }; + + auto FiniCB = [&](InsertPointTy IP) { + BasicBlock *IPBB = IP.getBlock(); + EXPECT_NE(IPBB->end(), IP.getPoint()); + }; + + // Test for "#omp ordered simd" + Builder.restoreIP( + OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false)); + + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + EXPECT_NE(EntryBB->getTerminator(), nullptr); + + CallInst *OrderedEntryCI = nullptr; + for (auto &EI : *EntryBB) { + Instruction *Cur = &EI; + if (isa(Cur)) { + OrderedEntryCI = cast(Cur); + if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") + break; + OrderedEntryCI = nullptr; + } + } + EXPECT_EQ(OrderedEntryCI, nullptr); + + CallInst *OrderedEndCI = nullptr; + for (auto &FI : *EntryBB) { + Instruction *Cur = &FI; + if (isa(Cur)) { + OrderedEndCI = cast(Cur); + if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") + break; + OrderedEndCI = nullptr; + } + } + EXPECT_EQ(OrderedEndCI, nullptr); +} + TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize();