diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4608,7 +4608,8 @@ } } if (ThisCaptureRegion == OMPD_parallel) { - // Capture temp arrays for inscan reductions. + // Capture temp arrays for inscan reductions and locals in aligned + // clauses. for (OMPClause *C : Clauses) { if (auto *RC = dyn_cast(C)) { if (RC->getModifier() != OMPC_REDUCTION_inscan) @@ -4616,6 +4617,10 @@ for (Expr *E : RC->copy_array_temps()) MarkDeclarationsReferencedInExpr(E); } + if (auto *AC = dyn_cast(C)) { + for (Expr *E : AC->varlists()) + MarkDeclarationsReferencedInExpr(E); + } } } if (++CompletedRegions == CaptureRegions.size()) diff --git a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp @@ -0,0 +1,346 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK4 + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void foo(float *c) { +#pragma omp parallel for simd aligned(c) + for (int i = 0; i < 10; ++i); +} + +#endif + +// CHECK1-LABEL: define {{[^@]+}}@_Z3fooPf +// CHECK1-SAME: (float* [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[C_ADDR]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[TMP0]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 16) ] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3fooPf +// CHECK2-SAME: (float* [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[C_ADDR]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[TMP0]], align 8 +// CHECK2-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 16) ] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3fooPf +// CHECK3-SAME: (float* [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[C_ADDR]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[C_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[TMP0]], align 8 +// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 16) ] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@_Z3fooPf +// CHECK4-SAME: (float* [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[C_ADDR]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load float**, float*** [[C_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load float*, float** [[TMP0]], align 8 +// CHECK4-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 16) ] +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK4-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK4: .omp.final.then: +// CHECK4-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK4: .omp.final.done: +// CHECK4-NEXT: ret void +//