Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5314,6 +5314,33 @@ } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // TODO: The ordered directive with depend or simd clause. + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *OrderedRegionBodyStmt = CS->getCapturedStmt(); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [OrderedRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, OrderedRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB)); + + return; + } + if (S.hasClausesOfKind()) { assert(!S.hasAssociatedStmt() && "No associated statement must be in ordered depend construct."); Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -5320,8 +5320,9 @@ if (Rel == BO_LE || Rel == BO_GE) { // Add one to the range if the relational operator is inclusive. - Range = - AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range)); + Range = AssertSuccess(Actions.BuildBinOp( + nullptr, {}, BO_Add, Range, + Actions.ActOnIntegerConstant(SourceLocation(), 1).get())); } // Divide by the absolute step amount. Index: clang/test/OpenMP/ordered_codegen.cpp =================================================================== --- clang/test/OpenMP/ordered_codegen.cpp +++ clang/test/OpenMP/ordered_codegen.cpp @@ -1,11 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -128,7 +136,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -137,9 +145,12 @@ // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK1-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -157,6 +168,7 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -188,6 +200,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -195,7 +208,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -213,7 +228,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -222,9 +237,11 @@ // CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -243,6 +260,7 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -270,6 +288,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -277,7 +296,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -303,7 +324,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -336,9 +357,11 @@ // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -369,6 +392,7 @@ // CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK1-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK1-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -400,6 +424,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK1-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -409,7 +434,9 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -430,7 +457,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -440,9 +467,11 @@ // CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -470,6 +499,7 @@ // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -501,6 +531,7 @@ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -508,7 +539,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -535,7 +568,7 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK1-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -575,7 +608,15 @@ // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK1-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -622,9 +663,11 @@ // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -632,13 +675,15 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK1: omp.inner.for.cond29: +// CHECK1-IRBUILDER: omp.inner.for.cond33: +// CHECK1-NORMAL: omp.inner.for.cond29: // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK1: omp.inner.for.body32: +// CHECK1-IRBUILDER: omp.inner.for.body36: +// CHECK1-NORMAL: omp.inner.for.body32: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -648,17 +693,29 @@ // CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] // CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK1-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK1-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK1: omp.body.continue37: +// CHECK1-IRBUILDER: omp.body.continue44: +// CHECK1-NORMAL: omp.body.continue37: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK1: omp.inner.for.inc38: +// CHECK1-IRBUILDER: omp.inner.for.inc45: +// CHECK1-NORMAL: omp.inner.for.inc38: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1: omp.inner.for.end40: +// CHECK1-IRBUILDER: omp.inner.for.end48: +// CHECK1-NORMAL: omp.inner.for.end40: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] @@ -681,34 +738,36 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: ret void +// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-NORMAL-NEXT: entry: +// CHECK1-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK1-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK1-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK1-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-NORMAL-NEXT: ret void +// +// +// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK1-NORMAL-NEXT: entry: +// CHECK1-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK1-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK1-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK1-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-NORMAL-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ @@ -725,7 +784,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -734,9 +793,12 @@ // CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK2-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -754,6 +816,7 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -785,6 +848,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -792,7 +856,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -810,7 +876,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -819,9 +885,11 @@ // CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -840,6 +908,7 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -867,6 +936,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -874,7 +944,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -900,7 +972,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -933,9 +1005,11 @@ // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -966,6 +1040,7 @@ // CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK2-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK2-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -997,6 +1072,7 @@ // CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK2-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1006,7 +1082,9 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1027,7 +1105,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1037,9 +1115,11 @@ // CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1067,6 +1147,7 @@ // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK2-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1098,6 +1179,7 @@ // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -1105,7 +1187,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // @@ -1132,7 +1216,7 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK2-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1172,7 +1256,15 @@ // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK2-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK2-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1219,9 +1311,11 @@ // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1229,13 +1323,15 @@ // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK2: omp.inner.for.cond29: +// CHECK2-IRBUILDER: omp.inner.for.cond33: +// CHECK2-NORMAL: omp.inner.for.cond29: // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK2: omp.inner.for.body32: +// CHECK2-IRBUILDER: omp.inner.for.body36: +// CHECK2-NORMAL: omp.inner.for.body32: // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1245,17 +1341,29 @@ // CHECK2-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK2-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] // CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK2-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK2-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK2: omp.body.continue37: +// CHECK2-IRBUILDER: omp.body.continue44: +// CHECK2-NORMAL: omp.body.continue37: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK2: omp.inner.for.inc38: +// CHECK2-IRBUILDER: omp.inner.for.inc45: +// CHECK2-NORMAL: omp.inner.for.inc38: // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK2: omp.inner.for.end40: +// CHECK2-IRBUILDER: omp.inner.for.end48: +// CHECK2-NORMAL: omp.inner.for.end40: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1278,34 +1386,36 @@ // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: ret void +// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK2-NORMAL-NEXT: entry: +// CHECK2-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK2-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK2-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK2-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-NORMAL-NEXT: ret void +// +// +// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK2-NORMAL-NEXT: entry: +// CHECK2-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK2-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK2-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK2-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-NORMAL-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ @@ -1322,7 +1432,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1331,9 +1441,12 @@ // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK3-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1351,6 +1464,7 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -1382,6 +1496,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1389,7 +1504,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1407,7 +1524,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1416,9 +1533,11 @@ // CHECK3-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1437,6 +1556,7 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -1464,6 +1584,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1471,7 +1592,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1497,7 +1620,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1530,9 +1653,11 @@ // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1563,6 +1688,7 @@ // CHECK3-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -1594,6 +1720,7 @@ // CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK3-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1603,7 +1730,9 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1624,7 +1753,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1634,9 +1763,11 @@ // CHECK3-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1664,6 +1795,7 @@ // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -1695,6 +1827,7 @@ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1702,7 +1835,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -1729,7 +1864,7 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK3-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -1769,7 +1904,15 @@ // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] // CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK3-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK3-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1816,9 +1959,11 @@ // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1826,13 +1971,15 @@ // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK3: omp.inner.for.cond29: +// CHECK3-IRBUILDER: omp.inner.for.cond33: +// CHECK3-NORMAL: omp.inner.for.cond29: // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK3: omp.inner.for.body32: +// CHECK3-IRBUILDER: omp.inner.for.body36: +// CHECK3-NORMAL: omp.inner.for.body32: // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -1842,17 +1989,29 @@ // CHECK3-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] // CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK3-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK3-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK3: omp.body.continue37: +// CHECK3-IRBUILDER: omp.body.continue44: +// CHECK3-NORMAL: omp.body.continue37: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK3: omp.inner.for.inc38: +// CHECK3-IRBUILDER: omp.inner.for.inc45: +// CHECK3-NORMAL: omp.inner.for.inc38: // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK3: omp.inner.for.end40: +// CHECK3-IRBUILDER: omp.inner.for.end48: +// CHECK3-NORMAL: omp.inner.for.end40: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1875,34 +2034,36 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: ret void +// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-NORMAL-NEXT: entry: +// CHECK3-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK3-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK3-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK3-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-NORMAL-NEXT: ret void +// +// +// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK3-NORMAL-NEXT: entry: +// CHECK3-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK3-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK3-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK3-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-NORMAL-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ @@ -1919,7 +2080,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -1928,9 +2089,12 @@ // CHECK4-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) +// CHECK4-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -1948,6 +2112,7 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 @@ -1979,6 +2144,7 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -1986,7 +2152,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2004,7 +2172,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2013,9 +2181,11 @@ // CHECK4-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2034,6 +2204,7 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127 // CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8 @@ -2061,6 +2232,7 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1 // CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2068,7 +2240,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2094,7 +2268,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I8:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2127,9 +2301,11 @@ // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2160,6 +2336,7 @@ // CHECK4-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]] // CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32 // CHECK4-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1 @@ -2191,6 +2368,7 @@ // CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1 // CHECK4-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2200,7 +2378,9 @@ // CHECK4: omp.dispatch.end: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2221,7 +2401,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK4-NEXT: [[X2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 @@ -2231,9 +2411,11 @@ // CHECK4-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2261,6 +2443,7 @@ // CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1 @@ -2292,6 +2475,7 @@ // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2299,7 +2483,9 @@ // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // @@ -2326,7 +2512,7 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4 // CHECK4-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4 @@ -2366,7 +2552,15 @@ // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] // CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK4-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// CHECK4-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2413,9 +2607,11 @@ // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1) // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] @@ -2423,13 +2619,15 @@ // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] -// CHECK4: omp.inner.for.cond29: +// CHECK4-IRBUILDER: omp.inner.for.cond33: +// CHECK4-NORMAL: omp.inner.for.cond29: // CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] -// CHECK4: omp.inner.for.body32: +// CHECK4-IRBUILDER: omp.inner.for.body36: +// CHECK4-NORMAL: omp.inner.for.body32: // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 @@ -2439,17 +2637,29 @@ // CHECK4-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK4-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] // CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// CHECK4-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 +// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled. +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK4-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64 +// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]] +// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4 +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] -// CHECK4: omp.body.continue37: +// CHECK4-IRBUILDER: omp.body.continue44: +// CHECK4-NORMAL: omp.body.continue37: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] -// CHECK4: omp.inner.for.inc38: +// CHECK4-IRBUILDER: omp.inner.for.inc45: +// CHECK4-NORMAL: omp.inner.for.inc38: // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 // CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) // CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK4: omp.inner.for.end40: +// CHECK4-IRBUILDER: omp.inner.for.end48: +// CHECK4-NORMAL: omp.inner.for.end40: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2472,34 +2682,36 @@ // CHECK4: .omp.final.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) +// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK4-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: ret void +// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-NORMAL-NEXT: entry: +// CHECK4-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK4-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK4-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK4-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK4-NORMAL-NEXT: ret void +// +// +// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK4-NORMAL-NEXT: entry: +// CHECK4-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK4-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CHECK4-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] +// CHECK4-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK4-NORMAL-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -724,6 +724,17 @@ FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); + /// Generator for '#omp ordered' + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param FiniCB Callback to finalize variable copies. + /// + /// \returns The insertion position *after* the ordered. + InsertPointTy createOrdered(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB); + /// Generator for '#omp sections' /// /// \param Loc The insert and source location description. Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1960,6 +1960,30 @@ /*Conditional*/ false, /*hasFinalize*/ true); } +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createOrdered(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB) { + if (!updateToLocation(Loc)) + return Loc.IP; + + Directive OMPD = Directive::OMPD_ordered; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + Value *Ident = getOrCreateIdent(SrcLocStr); + Value *ThreadId = getOrCreateThreadID(Ident); + Value *Args[] = {Ident, ThreadId}; + + Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered); + Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); + + Function *ExitRTLFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered); + Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); + + return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, + /*Conditional*/ false, /*hasFinalize*/ true); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -2113,6 +2113,77 @@ EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy); } +TEST_F(OpenMPIRBuilderTest, OrderedDirective) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + AllocaInst *PrivAI = + Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); + + BasicBlock *EntryBB = nullptr; + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &FiniBB) { + EntryBB = FiniBB.getUniquePredecessor(); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); + EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); + EXPECT_EQ(EntryBB, CodeGenIPBB); + + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(F->arg_begin(), PrivAI); + Value *PrivLoad = + Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); + Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + }; + + auto FiniCB = [&](InsertPointTy IP) { + BasicBlock *IPBB = IP.getBlock(); + EXPECT_NE(IPBB->end(), IP.getPoint()); + }; + + Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB)); + + Value *EntryBBTI = EntryBB->getTerminator(); + EXPECT_EQ(EntryBBTI, nullptr); + + CallInst *OrderedEntryCI = nullptr; + for (auto &EI : *EntryBB) { + Instruction *Cur = &EI; + if (isa(Cur)) { + OrderedEntryCI = cast(Cur); + if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") + break; + OrderedEntryCI = nullptr; + } + } + EXPECT_NE(OrderedEntryCI, nullptr); + EXPECT_EQ(OrderedEntryCI->getNumArgOperands(), 2U); + EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); + EXPECT_TRUE(isa(OrderedEntryCI->getArgOperand(0))); + + CallInst *OrderedEndCI = nullptr; + for (auto &FI : *EntryBB) { + Instruction *Cur = &FI; + if (isa(Cur)) { + OrderedEndCI = cast(Cur); + if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") + break; + OrderedEndCI = nullptr; + } + } + EXPECT_NE(OrderedEndCI, nullptr); + EXPECT_EQ(OrderedEndCI->getNumArgOperands(), 2U); + EXPECT_TRUE(isa(OrderedEndCI->getArgOperand(0))); + EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); +} + TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize();