diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -91,11 +91,17 @@ if (getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); llvm::IRBuilderBase::InsertPointGuard IPG(Builder); - // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, - // otherwise alloca is inserted at the current insertion point of the - // builder. - if (!ArraySize) - Builder.SetInsertPoint(AllocaInsertPt); + // Do not break the contiguity of static allocas by inserting addressspace + // casts in between static allocas, otherwise, inliner's attempt to move + // static allocas from callee to caller will fail, and which in turn will + // have serious side effects on code transformation/optimzation. + if (!ArraySize) { + auto *EBB = AllocaInsertPt->getParent(); + auto Iter = AllocaInsertPt->getIterator(); + if (Iter != EBB->end()) + ++Iter; + Builder.SetInsertPoint(EBB, Iter); + } V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); @@ -134,10 +140,11 @@ isa( cast(Alloca)->getPointerOperand()))); - auto *Store = new llvm::StoreInst(Init, Alloca, /*volatile*/ false, - Var.getAlignment().getAsAlign()); - llvm::BasicBlock *Block = AllocaInsertPt->getParent(); - Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); + // No need to depend on AllocaInsertPt to init static alloca, just init it + // where the builder is at the moment, otherwise there is a possibility of + // getting invalid ir (use before define) for addressspace casted static + // allocas. + Builder.CreateStore(Init, Var); } Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s @@ -9,72 +10,139 @@ #include "Inputs/cuda.h" // CHECK-LABEL: @_Z16use_dispatch_ptrPi( -// CHECK: %[[PTR:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i32* +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32** +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32** +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32* +// CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i32*, i32** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i32* [[OUT1]], i32** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i8 addrspace(4)* [[TMP1]] to i32* +// CHECK-NEXT: store i32* [[TMP2]], i32** [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4 +// CHECK-NEXT: ret void +// __global__ void use_dispatch_ptr(int* out) { const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); *out = *dispatch_ptr; } -// CHECK-LABEL: @_Z12test_ds_fmaxf( -// CHECK: call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float %{{[^,]*}}, i32 0, i32 0, i1 false) __global__ +// CHECK-LABEL: @_Z12test_ds_fmaxf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void test_ds_fmax(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); } // CHECK-LABEL: @_Z12test_ds_faddf( -// CHECK: call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float %{{[^,]*}}, i32 0, i32 0, i1 false) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __global__ void test_ds_fadd(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_faddf(&shared, src, 0, 0, false); } -// CHECK-LABEL: @_Z12test_ds_fminfPf(float %src, float addrspace(1)* %shared.coerce -// CHECK: %shared = alloca float*, align 8, addrspace(5) -// CHECK: %shared.ascast = addrspacecast float* addrspace(5)* %shared to float** -// CHECK: %shared.addr = alloca float*, align 8, addrspace(5) -// CHECK: %shared.addr.ascast = addrspacecast float* addrspace(5)* %shared.addr to float** -// CHECK: %[[S0:.*]] = addrspacecast float addrspace(1)* %shared.coerce to float* -// CHECK: store float* %[[S0]], float** %shared.ascast, align 8 -// CHECK: %shared1 = load float*, float** %shared.ascast, align 8 -// CHECK: store float* %shared1, float** %shared.addr.ascast, align 8 -// CHECK: %[[S1:.*]] = load float*, float** %shared.addr.ascast, align 8 -// CHECK: %[[S2:.*]] = addrspacecast float* %[[S1]] to float addrspace(3)* -// CHECK: call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* %[[S2]] +// CHECK-LABEL: @_Z12test_ds_fminfPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __global__ void test_ds_fmin(float src, float *shared) { volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); } -// CHECK: @_Z33test_ret_builtin_nondef_addrspace -// CHECK: %[[X:.*]] = alloca i8*, align 8, addrspace(5) -// CHECK: %[[XC:.*]] = addrspacecast i8* addrspace(5)* %[[X]] to i8** -// CHECK: %[[Y:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %[[YASCAST:.*]] = addrspacecast i8 addrspace(4)* %[[Y]] to i8* -// CHECK: store i8* %[[YASCAST]], i8** %[[XC]], align 8 +// CHECK-LABEL: @_Z33test_ret_builtin_nondef_addrspacev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i8* addrspace(5)* [[X]] to i8** +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(4)* [[TMP0]] to i8* +// CHECK-NEXT: store i8* [[TMP1]], i8** [[X_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __device__ void test_ret_builtin_nondef_addrspace() { void *x = __builtin_amdgcn_dispatch_ptr(); } // CHECK-LABEL: @_Z6endpgmv( -// CHECK: call void @llvm.amdgcn.endpgm() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.endpgm() +// CHECK-NEXT: ret void +// __global__ void endpgm() { __builtin_amdgcn_endpgm(); } // Check the 64 bit argument is correctly passed to the intrinsic without truncation or assertion. -// CHECK-LABEL: @_Z14test_uicmp_i64 -// CHECK: store i64* %out1, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %a, i64* %a.addr.ascast -// CHECK-NEXT: store i64 %b, i64* %b.addr.ascast -// CHECK-NEXT: %[[V0:.*]] = load i64, i64* %a.addr.ascast -// CHECK-NEXT: %[[V1:.*]] = load i64, i64* %b.addr.ascast -// CHECK-NEXT: %[[V2:.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %[[V0]], i64 %[[V1]], i32 35) -// CHECK-NEXT: %[[V3:.*]] = load i64*, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %[[V2]], i64* %[[V3]] -// CHECK-NEXT: ret void +// CHECK-LABEL: @_Z14test_uicmp_i64Pyyy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64* +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64* +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* +// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35) +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP4]], align 8 +// CHECK-NEXT: ret void +// __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b) { *out = __builtin_amdgcn_uicmpl(a, b, 30+5); @@ -82,11 +150,21 @@ // Check the 64 bit return value is correctly returned without truncation or assertion. -// CHECK-LABEL: @_Z14test_s_memtime -// CHECK: %[[V1:.*]] = call i64 @llvm.amdgcn.s.memtime() -// CHECK-NEXT: %[[PTR:.*]] = load i64*, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %[[V1]], i64* %[[PTR]] -// CHECK-NEXT: ret void +// CHECK-LABEL: @_Z14test_s_memtimePy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* +// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// CHECK-NEXT: ret void +// __global__ void test_s_memtime(unsigned long long* out) { *out = __builtin_amdgcn_s_memtime(); @@ -95,41 +173,55 @@ // Check a generic pointer can be passed as a shared pointer and a generic pointer. __device__ void func(float *x); -// CHECK: @_Z17test_ds_fmin_funcfPf -// CHECK: %[[SHARED:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[SHARED_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[SHARED]] to float** -// CHECK: %[[SRC_ADDR:.*]] = alloca float, align 4, addrspace(5) -// CHECK: %[[SRC_ADDR_ASCAST:.*]] = addrspacecast float addrspace(5)* %[[SRC_ADDR]] to float* -// CHECK: %[[SHARED_ADDR:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[SHARED_ADDR_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[SHARED_ADDR]] to float** -// CHECK: %[[X:.*]] = alloca float, align 4, addrspace(5) -// CHECK: %[[X_ASCAST:.*]] = addrspacecast float addrspace(5)* %[[X]] to float* -// CHECK: %[[SHARED1:.*]] = load float*, float** %[[SHARED_ASCAST]], align 8 -// CHECK: store float %src, float* %[[SRC_ADDR_ASCAST]], align 4 -// CHECK: store float* %[[SHARED1]], float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: %[[ARG0_PTR:.*]] = load float*, float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: %[[ARG0:.*]] = addrspacecast float* %[[ARG0_PTR]] to float addrspace(3)* -// CHECK: call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* %[[ARG0]] -// CHECK: %[[ARG0:.*]] = load float*, float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: call void @_Z4funcPf(float* %[[ARG0]]) #8 +// CHECK-LABEL: @_Z17test_ds_fmin_funcfPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z4funcPf(float* [[TMP5]]) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: ret void +// __global__ void test_ds_fmin_func(float src, float *__restrict shared) { volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); func(shared); } -// CHECK: @_Z14test_is_sharedPf(float addrspace(1)* %[[X_COERCE:.*]]) -// CHECK: %[[X:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[X_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[X]] to float** -// CHECK: %[[X_ADDR:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[X_ADDR_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[X_ADDR]] to float** -// CHECK: %[[X_FP:.*]] = addrspacecast float addrspace(1)* %[[X_COERCE]] to float* -// CHECK: store float* %[[X_FP]], float** %[[X_ASCAST]], align 8 -// CHECK: %[[X1:.*]] = load float*, float** %[[X_ASCAST]], align 8 -// CHECK: store float* %[[X1]], float** %[[X_ADDR_ASCAST]], align 8 -// CHECK: %[[X_TMP:.*]] = load float*, float** %[[X_ADDR_ASCAST]], align 8 -// CHECK: %[[X_ARG:.*]] = bitcast float* %[[X_TMP]] to i8* -// CHECK: call i1 @llvm.amdgcn.is.shared(i8* %[[X_ARG]]) +// CHECK-LABEL: @_Z14test_is_sharedPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8* +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float** +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load float*, float** [[X_ASCAST]], align 8 +// CHECK-NEXT: store float* [[X1]], float** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP2]]) +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8* [[RET_ASCAST]], align 1 +// CHECK-NEXT: ret void +// __global__ void test_is_shared(float *x){ bool ret = __builtin_amdgcn_is_shared(x); } diff --git a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp --- a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -1,53 +1,60 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: define{{.*}} void @_Z5func1Pi(i32* %x) +// CHECK-LABEL: @_Z5func1Pi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[X_ADDR]] to i32** +// CHECK-NEXT: store i32* [[X:%.*]], i32** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, i32* [[TMP0]], align 4 +// CHECK-NEXT: ret void +// void func1(int *x) { - // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32** - // CHECK: store i32* %x, i32** %[[r0]] - // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]] - // CHECK: store i32 1, i32* %[[r1]] *x = 1; } -// CHECK-LABEL: define{{.*}} void @_Z5func2v() +// CHECK-LABEL: @_Z5func2v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CHECK-NEXT: [[LP1:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[LP2:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LVC]] to i32* +// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32** +// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32** +// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]* +// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32* +// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32* +// CHECK-NEXT: store i32 1, i32* [[LV1_ASCAST]], align 4 +// CHECK-NEXT: store i32 2, i32* [[LV2_ASCAST]], align 4 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[LA_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store i32 3, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: store i32* [[LV1_ASCAST]], i32** [[LP1_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[LA_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store i32* [[ARRAYDECAY]], i32** [[LP2_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z5func1Pi(i32* [[LV1_ASCAST]]) +// CHECK-NEXT: store i32 4, i32* [[LVC_ASCAST]], align 4 +// CHECK-NEXT: store i32 4, i32* [[LV1_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void func2(void) { - // CHECK: %lv1 = alloca i32, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32* - // CHECK: %lv2 = alloca i32, align 4, addrspace(5) - // CHECK: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %lv2 to i32* - // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) - // CHECK: %[[r2:.*]] = addrspacecast [100 x i32] addrspace(5)* %la to [100 x i32]* - // CHECK: %lp1 = alloca i32*, align 8, addrspace(5) - // CHECK: %[[r3:.*]] = addrspacecast i32* addrspace(5)* %lp1 to i32** - // CHECK: %lp2 = alloca i32*, align 8, addrspace(5) - // CHECK: %[[r4:.*]] = addrspacecast i32* addrspace(5)* %lp2 to i32** - // CHECK: %lvc = alloca i32, align 4, addrspace(5) - // CHECK: %[[r5:.*]] = addrspacecast i32 addrspace(5)* %lvc to i32* - - // CHECK: store i32 1, i32* %[[r0]] + int lv1; lv1 = 1; - // CHECK: store i32 2, i32* %[[r1]] int lv2 = 2; - // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i64 0, i64 0 - // CHECK: store i32 3, i32* %[[arrayidx]], align 4 int la[100]; la[0] = 3; - // CHECK: store i32* %[[r0]], i32** %[[r3]], align 8 int *lp1 = &lv1; - // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i64 0, i64 0 - // CHECK: store i32* %[[arraydecay]], i32** %[[r4]], align 8 int *lp2 = la; - // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) func1(&lv1); - // CHECK: store i32 4, i32* %[[r5]] - // CHECK: store i32 4, i32* %[[r0]] const int lvc = 4; lv1 = lvc; } @@ -63,38 +70,62 @@ } }; -// CHECK-LABEL: define{{.*}} void @_Z5func3v +// CHECK-LABEL: @_Z5func3v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* +// CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: ret void +// void func3() { - // CHECK: %[[a:.*]] = alloca %class.A, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast %class.A addrspace(5)* %[[a]] to %class.A* - // CHECK: call void @_ZN1AC1Ev(%class.A* {{[^,]*}} %[[r0]]) - // CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) A a; } -// CHECK-LABEL: define{{.*}} void @_Z5func4i +// CHECK-LABEL: @_Z5func4i( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X_ADDR]] to i32* +// CHECK-NEXT: store i32 [[X:%.*]], i32* [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: call void @_Z5func1Pi(i32* [[X_ADDR_ASCAST]]) +// CHECK-NEXT: ret void +// void func4(int x) { - // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32* - // CHECK: store i32 %x, i32* %[[r0]], align 4 - // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) func1(&x); } -// CHECK-LABEL: define{{.*}} void @_Z5func5v +// CHECK-LABEL: @_Z5func5v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: ret void +// void func5() { return; int x = 0; } -// CHECK-LABEL: define{{.*}} void @_Z5func6v +// CHECK-LABEL: @_Z5func6v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: ret void +// void func6() { return; int x; } -// CHECK-LABEL: define{{.*}} void @_Z5func7v extern void use(int *); +// CHECK-LABEL: @_Z5func7v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: br label [[LATER:%.*]] +// CHECK: later: +// CHECK-NEXT: call void @_Z3usePi(i32* [[X_ASCAST]]) +// CHECK-NEXT: ret void +// void func7() { goto later; int x; @@ -102,4 +133,3 @@ use(&x); } -// CHECK-NOT: !opencl.ocl.version diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp --- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp +++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -O0 -triple amdgcn -emit-llvm %s -o - | FileCheck %s class A { @@ -17,77 +18,101 @@ void func_with_ref_arg(A &a); void func_with_ref_arg(B &b); -// CHECK-LABEL: define{{.*}} void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a) -// CHECK: %p = alloca %class.A*, align 8, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A** -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* -// CHECK: store %class.A* %[[r0]], %class.A** %[[r1]], align 8 +// CHECK-LABEL: @_Z22func_with_indirect_arg1A( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[P:%.*]] = alloca %class.A*, align 8, addrspace(5) +// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast %class.A* addrspace(5)* [[P]] to %class.A** +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A:%.*]] addrspace(5)* [[A:%.*]] to %class.A* +// CHECK-NEXT: store %class.A* [[A_ASCAST]], %class.A** [[P_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void func_with_indirect_arg(A a) { A *p = &a; } -// CHECK-LABEL: define{{.*}} void @_Z22test_indirect_arg_autov() -// CHECK: %a = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* -// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* -// CHECK: call void @_ZN1AC1Ev(%class.A* {{[^,]*}} %[[r0]]) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)* -// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r1]]) -// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) %[[r0]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) +// CHECK-LABEL: @_Z22test_indirect_arg_autov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A* +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* +// CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.A* [[A_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[TMP1]], i64 4, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.A* [[AGG_TMP_ASCAST]] to [[CLASS_A]] addrspace(5)* +// CHECK-NEXT: call void @_Z22func_with_indirect_arg1A([[CLASS_A]] addrspace(5)* [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[AGG_TMP_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: ret void +// void test_indirect_arg_auto() { A a; func_with_indirect_arg(a); func_with_ref_arg(a); } -// CHECK: define{{.*}} void @_Z24test_indirect_arg_globalv() -// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)* -// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) -// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*)) +// CHECK-LABEL: @_Z24test_indirect_arg_globalv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 addrspacecast (i8 addrspace(1)* bitcast ([[CLASS_A]] addrspace(1)* @g_a to i8 addrspace(1)*) to i8*), i64 4, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.A* [[AGG_TMP_ASCAST]] to [[CLASS_A]] addrspace(5)* +// CHECK-NEXT: call void @_Z22func_with_indirect_arg1A([[CLASS_A]] addrspace(5)* [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[AGG_TMP_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) addrspacecast ([[CLASS_A]] addrspace(1)* @g_a to %class.A*)) +// CHECK-NEXT: ret void +// void test_indirect_arg_global() { func_with_indirect_arg(g_a); func_with_ref_arg(g_a); } -// CHECK-LABEL: define{{.*}} void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %b) -// CHECK: %p = alloca %class.B*, align 8, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B** -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* -// CHECK: store %class.B* %[[r0]], %class.B** %[[r1]], align 8 +// CHECK-LABEL: @_Z19func_with_byval_arg1B( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[P:%.*]] = alloca %class.B*, align 8, addrspace(5) +// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast %class.B* addrspace(5)* [[P]] to %class.B** +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B:%.*]] addrspace(5)* [[B:%.*]] to %class.B* +// CHECK-NEXT: store %class.B* [[B_ASCAST]], %class.B** [[P_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void func_with_byval_arg(B b) { B *p = &b; } -// CHECK-LABEL: define{{.*}} void @_Z19test_byval_arg_autov() -// CHECK: %b = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* -// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)* -// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r4]]) -// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) %[[r0]]) +// CHECK-LABEL: @_Z19test_byval_arg_autov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B* +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.B* [[B_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[TMP1]], i64 400, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.B* [[AGG_TMP_ASCAST]] to [[CLASS_B]] addrspace(5)* +// CHECK-NEXT: call void @_Z19func_with_byval_arg1B([[CLASS_B]] addrspace(5)* byval([[CLASS_B]]) align 4 [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) [[B_ASCAST]]) +// CHECK-NEXT: ret void +// void test_byval_arg_auto() { B b; func_with_byval_arg(b); func_with_ref_arg(b); } -// CHECK-LABEL: define{{.*}} void @_Z21test_byval_arg_globalv() -// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)* -// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r2]]) -// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*)) +// CHECK-LABEL: @_Z21test_byval_arg_globalv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 addrspacecast (i8 addrspace(1)* bitcast ([[CLASS_B]] addrspace(1)* @g_b to i8 addrspace(1)*) to i8*), i64 400, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.B* [[AGG_TMP_ASCAST]] to [[CLASS_B]] addrspace(5)* +// CHECK-NEXT: call void @_Z19func_with_byval_arg1B([[CLASS_B]] addrspace(5)* byval([[CLASS_B]]) align 4 [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) addrspacecast ([[CLASS_B]] addrspace(1)* @g_b to %class.B*)) +// CHECK-NEXT: ret void +// void test_byval_arg_global() { func_with_byval_arg(g_b); func_with_ref_arg(g_b); diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,265 +1,352 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ // RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s +// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { - // CHECK-LABEL: test_non_volatile_parameter32 __UINT32_TYPE__ res; - // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** - // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* - // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load i32, i32* %1, align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load i32, i32* %5, align 4 - // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z29test_non_volatile_parameter64Py( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { - // CHECK-LABEL: test_non_volatile_parameter64 __UINT64_TYPE__ res; - // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** - // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* - // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load i64, i64* %1, align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load i64, i64* %5, align 8 - // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z25test_volatile_parameter32PVj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load volatile i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { - // CHECK-LABEL: test_volatile_parameter32 __UINT32_TYPE__ res; - // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** - // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* - // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4 - // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z25test_volatile_parameter64PVy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, i64* [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load volatile i64, i64* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { - // CHECK-LABEL: test_volatile_parameter64 __UINT64_TYPE__ res; - // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** - // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* - // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8 - // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z13test_shared32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_shared32() { - // CHECK-LABEL: test_shared32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z13test_shared64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_shared64() { - // CHECK-LABEL: test_shared64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((device)) __UINT32_TYPE__ global_val32; +// CHECK-LABEL: @_Z13test_global32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_global32() { - // CHECK-LABEL: test_global32 - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((device)) __UINT64_TYPE__ global_val64; +// CHECK-LABEL: @_Z13test_global64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_global64() { - // CHECK-LABEL: test_global64 - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((constant)) __UINT32_TYPE__ cval32; +// CHECK-LABEL: @_Z15test_constant32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LOCAL_VAL]] to i32* +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* [[LOCAL_VAL_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[LOCAL_VAL_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_constant32() { - // CHECK-LABEL: test_constant32 __UINT32_TYPE__ local_val; - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4 local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4 local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((constant)) __UINT64_TYPE__ cval64; +// CHECK-LABEL: @_Z15test_constant64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[LOCAL_VAL]] to i64* +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* [[LOCAL_VAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[LOCAL_VAL_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_constant64() { - // CHECK-LABEL: test_constant64 __UINT64_TYPE__ local_val; - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8 local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8 local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_order32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP0]], i32 2, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP2]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP4]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP6]], i32 5, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP8]], i32 6, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP9]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP10]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP11]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_order32() { - // CHECK-LABEL: test_order32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 2, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_RELAXED, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_CONSUME, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 5, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 6, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_order64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP0]], i32 2, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP2]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP4]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP6]], i32 5, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP8]], i32 6, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP9]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP10]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP11]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_order64() { - // CHECK-LABEL: test_order64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 2, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_RELAXED, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_CONSUME, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 5, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 6, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_scope32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP0]], i32 7, i32 1, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP4]], i32 7, i32 3, i1 false) +// CHECK-NEXT: store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP6]], i32 7, i32 4, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_scope32() { - // CHECK-LABEL: test_scope32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, ""); - // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent"); - // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront"); } +// CHECK-LABEL: @_Z12test_scope64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP0]], i32 7, i32 1, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP4]], i32 7, i32 3, i1 false) +// CHECK-NEXT: store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP6]], i32 7, i32 4, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_scope64() { - // CHECK-LABEL: test_scope64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, ""); - // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent"); - // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront"); } diff --git a/clang/test/CodeGenCXX/vla.cpp b/clang/test/CodeGenCXX/vla.cpp --- a/clang/test/CodeGenCXX/vla.cpp +++ b/clang/test/CodeGenCXX/vla.cpp @@ -18,18 +18,18 @@ // rdar://problem/9506377 void test0(void *array, int n) { // CHECK-LABEL: define{{.*}} void @_Z5test0Pvi( - // X64: [[ARRAY:%.*]] = alloca i8*, align 8 // AMDGCN: [[ARRAY0:%.*]] = alloca i8*, align 8, addrspace(5) + // AMDGCN-NEXT: [[N0:%.*]] = alloca i32, align 4, addrspace(5) + // AMDGCN-NEXT: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5) + // AMDGCN-NEXT: [[S0:%.*]] = alloca i16, align 2, addrspace(5) + // AMDGCN-NEXT: [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16* + // AMDGCN-NEXT: [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16** + // AMDGCN-NEXT: [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32* // AMDGCN-NEXT: [[ARRAY:%.*]] = addrspacecast i8* addrspace(5)* [[ARRAY0]] to i8** + // X64: [[ARRAY:%.*]] = alloca i8*, align 8 // X64-NEXT: [[N:%.*]] = alloca i32, align 4 - // AMDGCN: [[N0:%.*]] = alloca i32, align 4, addrspace(5) - // AMDGCN-NEXT: [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32* // X64-NEXT: [[REF:%.*]] = alloca i16*, align 8 - // AMDGCN: [[REF0:%.*]] = alloca i16*, align 8, addrspace(5) - // AMDGCN-NEXT: [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16** // X64-NEXT: [[S:%.*]] = alloca i16, align 2 - // AMDGCN: [[S0:%.*]] = alloca i16, align 2, addrspace(5) - // AMDGCN-NEXT: [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16* // CHECK-NEXT: store i8* // CHECK-NEXT: store i32 diff --git a/clang/test/CodeGenSYCL/address-space-deduction.cpp b/clang/test/CodeGenSYCL/address-space-deduction.cpp --- a/clang/test/CodeGenSYCL/address-space-deduction.cpp +++ b/clang/test/CodeGenSYCL/address-space-deduction.cpp @@ -1,73 +1,129 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// CHECK: @_ZZ4testvE3foo = internal addrspace(1) constant i32 66, align 4 -// CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr addrspace(1) constant [14 x i8] c"Hello, world!\00", align 1 -// CHECK-LABEL: @_Z4testv +// CHECK-LABEL: @_Z4testv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8 +// CHECK-NEXT: [[IS_I_PTR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[VAR23:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CP:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[ARR:%.*]] = alloca [42 x i32], align 4 +// CHECK-NEXT: [[CPP:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[APTR:%.*]] = alloca i32 addrspace(4)*, align 8 +// CHECK-NEXT: [[STR:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[PHI_STR:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_NULL:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_STR_TRIVIAL1:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_STR_TRIVIAL2:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_STR_TRIVIAL2_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL2]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)* +// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)* +// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)* +// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)* +// CHECK-NEXT: store i32 0, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: store i32 addrspace(4)* [[I_ASCAST]], i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(4)* [[TMP0]], [[I_ASCAST]] +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[IS_I_PTR_ASCAST]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: store i32 66, i32 addrspace(4)* [[TMP1]], align 4 +// CHECK-NEXT: store i32 23, i32 addrspace(4)* [[VAR23_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 addrspace(4)* [[VAR23_ASCAST]] to i8 addrspace(4)* +// CHECK-NEXT: store i8 addrspace(4)* [[TMP2]], i8 addrspace(4)* addrspace(4)* [[CP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[CP_ASCAST]], align 8 +// CHECK-NEXT: store i8 41, i8 addrspace(4)* [[TMP3]], align 1 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 addrspace(4)* [[ARRAYDECAY]] to i8 addrspace(4)* +// CHECK-NEXT: store i8 addrspace(4)* [[TMP4]], i8 addrspace(4)* addrspace(4)* [[CPP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[CPP_ASCAST]], align 8 +// CHECK-NEXT: store i8 43, i8 addrspace(4)* [[TMP5]], align 1 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[ARRAYDECAY1]], i64 10 +// CHECK-NEXT: store i32 addrspace(4)* [[ADD_PTR]], i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[ARRAYDECAY2]], i64 168 +// CHECK-NEXT: [[CMP4:%.*]] = icmp ult i32 addrspace(4)* [[TMP6]], [[ADD_PTR3]] +// CHECK-NEXT: br i1 [[CMP4]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// CHECK: if.then: +// CHECK-NEXT: [[TMP7:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: store i32 44, i32 addrspace(4)* [[TMP7]], align 4 +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @.str to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[TMP8]], i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = load i8, i8 addrspace(4)* [[ARRAYIDX]], align 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP9]] to i32 +// CHECK-NEXT: store i32 [[CONV]], i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], 2 +// CHECK-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP11:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i8 addrspace(4)* [ [[TMP11]], [[COND_TRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), [[COND_FALSE]] ] +// CHECK-NEXT: store i8 addrspace(4)* [[COND]], i8 addrspace(4)* addrspace(4)* [[PHI_STR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 2 +// CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[CMP6]] to i64 +// CHECK-NEXT: [[COND7:%.*]] = select i1 [[CMP6]], i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @.str.2 to [24 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* null +// CHECK-NEXT: store i8 addrspace(4)* [[COND7]], i8 addrspace(4)* addrspace(4)* [[SELECT_NULL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: store i8 addrspace(4)* [[TMP14]], i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL1_ASCAST]], align 8 +// CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL2_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void test() { static const int foo = 0x42; - // CHECK: %i.ascast = addrspacecast i32* %i to i32 addrspace(4)* - // CHECK: %[[ARR:[a-zA-Z0-9]+]] = alloca [42 x i32] - // CHECK: %[[ARR]].ascast = addrspacecast [42 x i32]* %[[ARR]] to [42 x i32] addrspace(4)* int i = 0; int *pptr = &i; - // CHECK: store i32 addrspace(4)* %i.ascast, i32 addrspace(4)* addrspace(4)* %pptr.ascast bool is_i_ptr = (pptr == &i); - // CHECK: %[[VALPPTR:[0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %pptr.ascast - // CHECK: %cmp{{[0-9]*}} = icmp eq i32 addrspace(4)* %[[VALPPTR]], %i.ascast *pptr = foo; int var23 = 23; char *cp = (char *)&var23; *cp = 41; - // CHECK: store i32 23, i32 addrspace(4)* %[[VAR:[a-zA-Z0-9.]+]] - // CHECK: [[VARCAST:%.*]] = bitcast i32 addrspace(4)* %[[VAR]] to i8 addrspace(4)* - // CHECK: store i8 addrspace(4)* [[VARCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} int arr[42]; char *cpp = (char *)arr; *cpp = 43; - // CHECK: [[ARRDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 - // CHECK: [[ARRCAST:%.*]] = bitcast i32 addrspace(4)* [[ARRDECAY]] to i8 addrspace(4)* - // CHECK: store i8 addrspace(4)* [[ARRCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} int *aptr = arr + 10; if (aptr < arr + sizeof(arr)) *aptr = 44; - // CHECK: %[[VALAPTR:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %aptr.ascast - // CHECK: %[[ARRDCY2:.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 - // CHECK: %[[ADDPTR:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[ARRDCY2]], i64 168 - // CHECK: %cmp{{[0-9]+}} = icmp ult i32 addrspace(4)* %[[VALAPTR]], %[[ADDPTR]] const char *str = "Hello, world!"; - // CHECK: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @[[STR]] to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %[[STRVAL:[a-zA-Z0-9]+]].ascast, align 8 i = str[0]; const char *phi_str = i > 2 ? str : "Another hello world!"; (void)phi_str; - // CHECK: %[[COND:[a-zA-Z0-9]+]] = icmp sgt i32 %{{.*}}, 2 - // CHECK: br i1 %[[COND]], label %[[CONDTRUE:[.a-zA-Z0-9]+]], label %[[CONDFALSE:[.a-zA-Z0-9]+]] - // CHECK: [[CONDTRUE]]: - // CHECK-NEXT: %[[VALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %str.ascast - // CHECK-NEXT: br label %[[CONDEND:[.a-zA-Z0-9]+]] - // CHECK: [[CONDFALSE]]: - // CHECK: [[CONDEND]]: - // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @{{.*}} to [21 x i8] addrspace(4)*), i64 0, i64 0), %[[CONDFALSE]] ] const char *select_null = i > 2 ? "Yet another Hello world" : nullptr; (void)select_null; - // CHECK: select i1 %{{.*}}, i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @{{.*}} to [24 x i8] addrspace(4)*), i64 0, i64 0) const char *select_str_trivial1 = true ? str : "Another hello world!"; (void)select_str_trivial1; - // CHECK: %[[TRIVIALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[STRVAL]] - // CHECK: store i8 addrspace(4)* %[[TRIVIALTRUE]], i8 addrspace(4)* addrspace(4)* %{{.*}}, align 8 const char *select_str_trivial2 = false ? str : "Another hello world!"; (void)select_str_trivial2; diff --git a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp --- a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp +++ b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp @@ -12,6 +12,8 @@ int arr[N]; // CHECK: [[VAR_ADDR:%.+]] = alloca [100 x i32]*, align 8, addrspace(5) + // CHECK-NEXT: [[VAR_I:%.+]] = alloca i32, align 4, addrspace(5) + // CHECK-NEXT: [[VAR_I_CAST:%.+]] = addrspacecast i32 addrspace(5)* [[VAR_I]] to i32* // CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast [100 x i32]* addrspace(5)* [[VAR_ADDR]] to [100 x i32]** // CHECK: store [100 x i32]* [[VAR:%.+]], [100 x i32]** [[VAR_ADDR_CAST]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -301,7 +301,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -337,6 +336,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -637,7 +637,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -673,6 +672,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -784,7 +784,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -829,6 +828,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1129,7 +1129,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1165,6 +1164,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1276,7 +1276,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -1321,6 +1320,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1598,7 +1598,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1634,6 +1633,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1934,7 +1934,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1970,6 +1969,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2081,7 +2081,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2126,6 +2125,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2426,7 +2426,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2462,6 +2461,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2573,7 +2573,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2618,6 +2617,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2895,7 +2895,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2931,6 +2930,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3231,7 +3231,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3267,6 +3266,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3378,7 +3378,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3423,6 +3422,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3723,7 +3723,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3759,6 +3758,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3870,7 +3870,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3915,6 +3914,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4192,7 +4192,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4228,6 +4227,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4528,7 +4528,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4564,6 +4563,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4675,7 +4675,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -4720,6 +4719,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5020,7 +5020,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5056,6 +5055,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5167,7 +5167,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -5212,6 +5211,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5489,7 +5489,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5525,6 +5524,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5825,7 +5825,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5861,6 +5860,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5972,7 +5972,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6017,6 +6016,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6317,7 +6317,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6353,6 +6352,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6464,7 +6464,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6509,6 +6508,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6786,7 +6786,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6822,6 +6821,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7122,7 +7122,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7158,6 +7157,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7269,7 +7269,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -7314,6 +7313,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7614,7 +7614,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7650,6 +7649,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7761,7 +7761,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -7806,6 +7805,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8083,7 +8083,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8119,6 +8118,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8419,7 +8419,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8455,6 +8454,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8566,7 +8566,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8611,6 +8610,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -8911,7 +8911,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8947,6 +8946,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9058,7 +9058,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9103,6 +9102,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9380,7 +9380,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9416,6 +9415,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9716,7 +9716,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9752,6 +9751,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9863,7 +9863,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9908,6 +9907,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10208,7 +10208,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10244,6 +10243,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10355,7 +10355,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10400,6 +10399,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10495,4 +10495,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -184,23 +184,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -261,23 +261,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -312,7 +312,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -337,26 +336,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -417,24 +417,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -547,23 +547,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -624,24 +624,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -676,7 +676,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -701,26 +700,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -781,24 +781,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -837,7 +837,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -864,35 +863,36 @@ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -953,24 +953,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1081,23 +1081,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1158,24 +1158,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1210,7 +1210,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1235,26 +1234,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1315,24 +1315,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1371,7 +1371,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -1398,35 +1397,36 @@ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1487,24 +1487,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1593,23 +1593,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1670,23 +1670,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1721,7 +1721,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1746,26 +1745,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1826,24 +1826,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1956,23 +1956,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2033,24 +2033,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2085,7 +2085,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2110,26 +2109,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2190,24 +2190,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2246,7 +2246,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2273,35 +2272,36 @@ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2362,24 +2362,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2490,23 +2490,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2567,24 +2567,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2619,7 +2619,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2644,26 +2643,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2724,24 +2724,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2780,7 +2780,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2807,35 +2806,36 @@ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2896,24 +2896,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3002,23 +3002,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3079,23 +3079,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3130,7 +3130,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3155,26 +3154,27 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3235,24 +3235,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3365,23 +3365,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3442,24 +3442,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3494,7 +3494,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3530,6 +3529,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3538,7 +3538,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3616,7 +3616,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3659,8 +3659,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3695,41 +3693,42 @@ // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK3: omp_if.else7: @@ -3759,6 +3758,7 @@ // CHECK3: omp_if.else17: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: br label [[OMP_IF_END19]] @@ -3769,7 +3769,7 @@ // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end22: // CHECK3-NEXT: br label [[OMP_IF_END23]] // CHECK3: omp_if.end23: @@ -3839,24 +3839,24 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -3895,7 +3895,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -3967,24 +3967,24 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4023,7 +4023,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4138,23 +4138,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4215,24 +4215,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4267,7 +4267,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4303,6 +4302,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4311,7 +4311,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4389,7 +4389,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4428,7 +4428,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -4455,35 +4454,36 @@ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4544,24 +4544,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4650,23 +4650,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4727,23 +4727,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4778,7 +4778,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4803,26 +4802,27 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4883,24 +4883,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5013,23 +5013,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5090,24 +5090,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5142,7 +5142,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5178,6 +5177,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5186,7 +5186,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5264,7 +5264,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5307,8 +5307,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -5343,41 +5341,42 @@ // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then6: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK4: omp_if.else7: @@ -5407,6 +5406,7 @@ // CHECK4: omp_if.else17: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: br label [[OMP_IF_END19]] @@ -5417,7 +5417,7 @@ // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK4: omp.inner.for.end22: // CHECK4-NEXT: br label [[OMP_IF_END23]] // CHECK4: omp_if.end23: @@ -5487,24 +5487,24 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5543,7 +5543,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5615,24 +5615,24 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5671,7 +5671,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5786,23 +5786,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5863,24 +5863,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5915,7 +5915,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5951,6 +5950,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5959,7 +5959,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6037,7 +6037,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6076,7 +6076,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6103,35 +6102,36 @@ // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6192,24 +6192,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7450,23 +7450,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7527,23 +7527,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7578,7 +7578,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7603,26 +7602,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7683,24 +7683,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7813,23 +7813,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7890,24 +7890,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7942,7 +7942,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7967,26 +7966,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8047,24 +8047,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8103,7 +8103,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8130,35 +8129,36 @@ // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8219,24 +8219,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8347,23 +8347,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8424,24 +8424,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8476,7 +8476,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8501,26 +8500,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8581,24 +8581,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8637,7 +8637,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8664,35 +8663,36 @@ // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8753,24 +8753,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8859,23 +8859,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -8936,23 +8936,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -8987,7 +8987,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9012,26 +9011,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9092,24 +9092,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9222,23 +9222,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9299,24 +9299,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9351,7 +9351,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9376,26 +9375,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9456,24 +9456,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9512,7 +9512,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9539,35 +9538,36 @@ // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9628,24 +9628,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9756,23 +9756,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9833,24 +9833,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9885,7 +9885,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9910,26 +9909,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9990,24 +9990,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10046,7 +10046,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10073,35 +10072,36 @@ // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10162,24 +10162,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10268,23 +10268,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10345,23 +10345,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10396,7 +10396,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10421,26 +10420,27 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10501,24 +10501,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10631,23 +10631,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10708,24 +10708,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10760,7 +10760,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10796,6 +10795,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10804,7 +10804,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10882,7 +10882,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10925,8 +10925,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10961,41 +10959,42 @@ // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK11: omp_if.else7: @@ -11025,6 +11024,7 @@ // CHECK11: omp_if.else17: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: br label [[OMP_IF_END19]] @@ -11035,7 +11035,7 @@ // CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK11: omp.inner.for.end22: // CHECK11-NEXT: br label [[OMP_IF_END23]] // CHECK11: omp_if.end23: @@ -11105,24 +11105,24 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11161,7 +11161,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11233,24 +11233,24 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11289,7 +11289,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11404,23 +11404,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11481,24 +11481,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11533,7 +11533,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11569,6 +11568,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11577,7 +11577,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11655,7 +11655,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11694,7 +11694,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -11721,35 +11720,36 @@ // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11810,24 +11810,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11916,23 +11916,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -11993,23 +11993,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12044,7 +12044,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12069,26 +12068,27 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12149,24 +12149,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12279,23 +12279,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12356,24 +12356,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12408,7 +12408,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12444,6 +12443,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12452,7 +12452,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12530,7 +12530,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12573,8 +12573,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -12609,41 +12607,42 @@ // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then6: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK12: omp_if.else7: @@ -12673,6 +12672,7 @@ // CHECK12: omp_if.else17: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: br label [[OMP_IF_END19]] @@ -12683,7 +12683,7 @@ // CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK12-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK12: omp.inner.for.end22: // CHECK12-NEXT: br label [[OMP_IF_END23]] // CHECK12: omp_if.end23: @@ -12753,24 +12753,24 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -12809,7 +12809,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -12881,24 +12881,24 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -12937,7 +12937,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13052,23 +13052,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13129,24 +13129,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13181,7 +13181,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13217,6 +13216,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13225,7 +13225,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13303,7 +13303,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13342,7 +13342,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -13369,35 +13368,36 @@ // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13458,24 +13458,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -91,7 +91,7 @@ // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4 // CHECK1-NEXT: store double 3.000000e+00, double* [[B]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -127,7 +127,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[BAR_A]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double // CHECK1-NEXT: store double [[CONV]], double* addrspacecast (double addrspace(3)* @bar_b to double*), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR6]] +// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] // CHECK1-NEXT: ret void // // @@ -138,9 +138,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -447,15 +447,15 @@ // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // // @@ -488,9 +488,9 @@ // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -499,6 +499,6 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** // CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR1]] +// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -2984,7 +2984,6 @@ // CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -3004,8 +3003,9 @@ // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -3259,23 +3259,23 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR7:[0-9]+]] +// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] -// CHECK4-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] // CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -3318,7 +3318,6 @@ // CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -3336,8 +3335,9 @@ // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK5-NEXT: ret void // CHECK5: worker.exit: @@ -3585,20 +3585,20 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] // CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -3641,7 +3641,6 @@ // CHECK6-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -3659,8 +3658,9 @@ // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK6-NEXT: ret void // CHECK6: worker.exit: @@ -3908,20 +3908,20 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] // CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] // CHECK6-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -49,7 +49,7 @@ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -70,7 +70,7 @@ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -84,20 +84,20 @@ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void @@ -127,7 +127,7 @@ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK2-NEXT: ret void // // @@ -148,7 +148,7 @@ // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -162,20 +162,20 @@ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void @@ -205,7 +205,7 @@ // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK3-NEXT: ret void // // @@ -226,7 +226,7 @@ // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -240,20 +240,20 @@ // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK3-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -45,7 +45,7 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -84,7 +84,7 @@ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -95,9 +95,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -118,7 +118,7 @@ // CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -132,7 +132,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -141,7 +141,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: ret void @@ -154,9 +154,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -178,7 +178,7 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -217,7 +217,7 @@ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -228,9 +228,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -251,7 +251,7 @@ // CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -265,7 +265,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK2: atomic_cont: // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -274,7 +274,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK2: atomic_exit: // CHECK2-NEXT: ret void @@ -287,9 +287,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -311,7 +311,7 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -350,7 +350,7 @@ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK3-NEXT: ret void // // @@ -361,9 +361,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -384,7 +384,7 @@ // CHECK3-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK3-NEXT: ret void // // @@ -398,7 +398,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -407,7 +407,7 @@ // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK3-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: // CHECK3-NEXT: ret void @@ -420,9 +420,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1485,9 +1485,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: ret void @@ -1512,9 +1512,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1539,9 +1539,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1613,9 +1613,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1701,9 +1701,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1760,9 +1760,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] // CHECK2-NEXT: ret void @@ -1787,9 +1787,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1814,9 +1814,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1886,9 +1886,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1973,9 +1973,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -584,15 +584,15 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -601,6 +601,6 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** // CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR1:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -516,9 +516,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -899,9 +899,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -1282,9 +1282,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -35,12 +35,12 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -73,7 +73,7 @@ // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] @@ -579,9 +579,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -602,12 +602,12 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -1160,9 +1160,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1269,12 +1269,12 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -1307,7 +1307,7 @@ // CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] @@ -1422,7 +1422,7 @@ // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) // CHECK2-NEXT: ret void // // @@ -1813,9 +1813,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1836,12 +1836,12 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -2394,9 +2394,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -2503,12 +2503,12 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -2541,7 +2541,7 @@ // CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] @@ -2656,7 +2656,7 @@ // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) // CHECK3-NEXT: ret void // // @@ -3047,9 +3047,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -3070,12 +3070,12 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -3628,9 +3628,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -56,7 +56,6 @@ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -68,6 +67,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -97,7 +97,6 @@ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) @@ -109,6 +108,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -138,7 +138,6 @@ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) @@ -150,6 +149,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) @@ -221,7 +221,6 @@ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -233,6 +232,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -262,7 +262,6 @@ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) @@ -274,6 +273,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -303,7 +303,6 @@ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) @@ -315,6 +314,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) @@ -386,7 +386,6 @@ // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -398,6 +397,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -427,7 +427,6 @@ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) @@ -439,6 +438,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -468,7 +468,6 @@ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) @@ -480,6 +479,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -579,14 +579,14 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -684,15 +684,15 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -701,14 +701,14 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -806,15 +806,15 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -823,14 +823,14 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -928,14 +928,14 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18477,7 +18477,6 @@ // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -18497,8 +18496,9 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -18795,14 +18795,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -18816,8 +18815,9 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -19059,7 +19059,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -19067,8 +19066,9 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -19248,7 +19248,6 @@ // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -19262,8 +19261,9 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -19481,7 +19481,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -19495,8 +19494,9 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -19799,7 +19799,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 @@ -19815,8 +19814,9 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -20071,7 +20071,6 @@ // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -20091,8 +20090,9 @@ // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -20389,14 +20389,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -20410,8 +20409,9 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -20653,7 +20653,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -20661,8 +20660,9 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -20842,7 +20842,6 @@ // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -20856,8 +20855,9 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -21075,7 +21075,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -21089,8 +21088,9 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -21388,7 +21388,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 @@ -21404,8 +21403,9 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -21660,7 +21660,6 @@ // CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -21676,8 +21675,9 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -21961,14 +21961,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -21980,8 +21979,9 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -22214,7 +22214,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -22222,8 +22221,9 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -22397,7 +22397,6 @@ // CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -22409,8 +22408,9 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -22618,7 +22618,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -22630,8 +22629,9 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -22934,7 +22934,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 @@ -22948,8 +22947,9 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -23194,7 +23194,6 @@ // CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -23210,8 +23209,9 @@ // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -23495,14 +23495,13 @@ // // // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -23514,8 +23513,9 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -23748,7 +23748,6 @@ // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -23756,8 +23755,9 @@ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -23931,7 +23931,6 @@ // CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -23943,8 +23942,9 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -24152,7 +24152,6 @@ // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -24164,8 +24163,9 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -24468,7 +24468,6 @@ // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 @@ -24482,8 +24481,9 @@ // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -39,7 +39,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -59,6 +58,7 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -304,10 +304,10 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] // CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -349,7 +349,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -365,6 +364,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -598,10 +598,10 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK2-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK2-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK2-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -643,7 +643,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -659,6 +658,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -892,10 +892,10 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK3-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK3-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK3-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -937,7 +937,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -957,6 +956,7 @@ // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -1202,10 +1202,10 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] // CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -1247,7 +1247,6 @@ // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1263,6 +1262,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -1496,10 +1496,10 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK5-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK5-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK5-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK5-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -1541,7 +1541,6 @@ // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK6-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1557,6 +1556,7 @@ // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) @@ -1790,10 +1790,10 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK6-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK6-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK6-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK6-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK6-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9390,7 +9390,6 @@ // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -9410,8 +9409,9 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -9732,14 +9732,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -9753,8 +9752,9 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -10020,7 +10020,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -10028,8 +10027,9 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -10223,7 +10223,6 @@ // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -10237,8 +10236,9 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -10474,7 +10474,6 @@ // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -10490,8 +10489,9 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -10799,14 +10799,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -10818,8 +10817,9 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -11076,7 +11076,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -11084,8 +11083,9 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -11273,7 +11273,6 @@ // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -11285,8 +11284,9 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -11512,7 +11512,6 @@ // CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -11528,8 +11527,9 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -11837,14 +11837,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -11856,8 +11855,9 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -12114,7 +12114,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) @@ -12122,8 +12121,9 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -12311,7 +12311,6 @@ // CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -12323,8 +12322,9 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -896,7 +896,6 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -908,8 +907,9 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void @@ -937,7 +937,6 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -948,8 +947,9 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void @@ -977,7 +977,6 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -988,8 +987,9 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void @@ -1017,7 +1017,6 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1028,8 +1027,9 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void @@ -1059,7 +1059,6 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 @@ -1075,8 +1074,9 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void @@ -1106,7 +1106,6 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 @@ -1121,8 +1120,9 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void @@ -1152,7 +1152,6 @@ // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1165,8 +1164,9 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK4-NEXT: ret void @@ -1196,7 +1196,6 @@ // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 @@ -1209,8 +1208,9 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4212,7 +4212,6 @@ // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -4224,8 +4223,9 @@ // CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void @@ -4323,7 +4323,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -4453,7 +4453,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4501,7 +4501,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4512,7 +4512,6 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* @@ -4529,8 +4528,9 @@ // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -4664,7 +4664,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -4825,7 +4825,7 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4883,7 +4883,7 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4894,7 +4894,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -4904,8 +4903,9 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -5118,7 +5118,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -5296,7 +5296,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -5463,7 +5463,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -5523,7 +5523,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -5534,7 +5534,6 @@ // CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -5544,8 +5543,9 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -5642,7 +5642,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -5772,7 +5772,7 @@ // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -5820,7 +5820,7 @@ // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -5831,7 +5831,6 @@ // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* @@ -5848,8 +5847,9 @@ // CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -5983,7 +5983,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6144,7 +6144,7 @@ // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6202,7 +6202,7 @@ // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6213,7 +6213,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* @@ -6222,8 +6221,9 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -6436,7 +6436,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6614,7 +6614,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6781,7 +6781,7 @@ // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6841,7 +6841,7 @@ // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6852,7 +6852,6 @@ // CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) @@ -6862,8 +6861,9 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -6960,7 +6960,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7090,7 +7090,7 @@ // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7138,7 +7138,7 @@ // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7149,7 +7149,6 @@ // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* @@ -7166,8 +7165,9 @@ // CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) @@ -7301,7 +7301,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7462,7 +7462,7 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7520,7 +7520,7 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7531,7 +7531,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* @@ -7540,8 +7539,9 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -7754,7 +7754,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7932,7 +7932,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -8099,7 +8099,7 @@ // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -8159,6 +8159,6 @@ // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -80,13 +80,13 @@ // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void @@ -111,13 +111,12 @@ // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -129,6 +128,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -179,13 +179,12 @@ // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -197,6 +196,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -250,13 +250,13 @@ // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK2-NEXT: ret void @@ -281,13 +281,12 @@ // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -299,6 +298,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -349,13 +349,12 @@ // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -367,6 +366,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -420,13 +420,13 @@ // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK5-NEXT: ret void @@ -451,13 +451,12 @@ // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -469,6 +468,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK5-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -519,13 +519,12 @@ // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK5-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -537,6 +536,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -590,13 +590,13 @@ // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK6-NEXT: ret void @@ -621,13 +621,12 @@ // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK6-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -639,6 +638,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK6-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -689,13 +689,12 @@ // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK6-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -707,6 +706,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -760,13 +760,13 @@ // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK7-NEXT: ret void @@ -791,13 +791,12 @@ // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -809,6 +808,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK7-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -859,13 +859,12 @@ // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK7-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -877,6 +876,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK7-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -916,4 +916,3 @@ // CHECK7-NEXT: call void @_Z3fn3v() // CHECK7-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -17,10 +17,10 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -67,7 +67,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -111,6 +110,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -993,7 +993,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1037,6 +1036,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -70,7 +70,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -114,6 +113,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -915,7 +915,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -959,6 +958,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1760,7 +1760,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1804,6 +1803,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2660,7 +2660,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -2704,6 +2703,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -365,7 +365,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -417,6 +416,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -678,7 +678,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -730,6 +729,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1010,7 +1010,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1062,6 +1061,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1307,7 +1307,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1359,6 +1358,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1620,7 +1620,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1672,6 +1671,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1952,7 +1952,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -2004,6 +2003,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2237,7 +2237,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2295,6 +2294,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2544,7 +2544,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2602,6 +2601,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2870,7 +2870,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2928,6 +2927,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -3161,7 +3161,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3219,6 +3218,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3468,7 +3468,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3526,6 +3525,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3794,7 +3794,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3852,6 +3851,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3897,4 +3897,3 @@ // CHECK4-NEXT: call void @__tgt_register_requires(i64 1) // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -7890,7 +7890,6 @@ // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -7923,6 +7922,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -9593,7 +9593,6 @@ // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK6-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -9626,6 +9625,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -11263,7 +11263,6 @@ // CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -11294,6 +11293,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -12924,7 +12924,6 @@ // CHECK8-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK8-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK8-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -12955,6 +12954,7 @@ // CHECK8: omp_if.else: // CHECK8-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK8-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] // CHECK8-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK8-NEXT: br label [[OMP_IF_END]] @@ -21312,7 +21312,6 @@ // CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -21345,6 +21344,7 @@ // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK21-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] @@ -22240,7 +22240,6 @@ // CHECK22-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK22-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK22-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK22-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK22-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK22-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -22273,6 +22272,7 @@ // CHECK22: omp_if.else: // CHECK22-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK22-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK22-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK22-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK22-NEXT: br label [[OMP_IF_END]] @@ -23150,7 +23150,6 @@ // CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -23181,6 +23180,7 @@ // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK23-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] @@ -24054,7 +24054,6 @@ // CHECK24-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK24-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK24-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK24-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK24-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK24-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -24085,6 +24084,7 @@ // CHECK24: omp_if.else: // CHECK24-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK24-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK24-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK24-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK24-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -491,7 +491,6 @@ // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -512,6 +511,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -547,7 +547,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -562,6 +561,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -590,7 +590,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -603,6 +602,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -644,7 +644,6 @@ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -654,6 +653,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -1035,7 +1035,6 @@ // CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1056,6 +1055,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1091,7 +1091,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1106,6 +1105,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1134,7 +1134,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -1147,6 +1146,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1188,7 +1188,6 @@ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -1198,6 +1197,7 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: ret void @@ -1576,7 +1576,6 @@ // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1595,6 +1594,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1629,7 +1629,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1644,6 +1643,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1672,7 +1672,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -1685,6 +1684,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1726,7 +1726,6 @@ // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -1734,6 +1733,7 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: ret void @@ -2108,7 +2108,6 @@ // CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2127,6 +2126,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2161,7 +2161,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2176,6 +2175,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2204,7 +2204,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2217,6 +2216,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2258,7 +2258,6 @@ // CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -2266,6 +2265,7 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: ret void @@ -2340,7 +2340,6 @@ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2353,6 +2352,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2396,7 +2396,6 @@ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2417,6 +2416,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2452,7 +2452,6 @@ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2467,6 +2466,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2496,7 +2496,6 @@ // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -2506,6 +2505,7 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void @@ -2577,7 +2577,6 @@ // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2590,6 +2589,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2633,7 +2633,6 @@ // CHECK10-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2654,6 +2653,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2689,7 +2689,6 @@ // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2704,6 +2703,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2733,7 +2733,6 @@ // CHECK10-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -2743,6 +2742,7 @@ // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: ret void @@ -2814,7 +2814,6 @@ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2827,6 +2826,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2870,7 +2870,6 @@ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2889,6 +2888,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2923,7 +2923,6 @@ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2938,6 +2937,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2967,7 +2967,6 @@ // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -2975,6 +2974,7 @@ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void @@ -3042,7 +3042,6 @@ // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -3055,6 +3054,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3098,7 +3098,6 @@ // CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -3117,6 +3116,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3151,7 +3151,6 @@ // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -3166,6 +3165,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3195,7 +3195,6 @@ // CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -3203,6 +3202,7 @@ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: ret void @@ -3573,7 +3573,6 @@ // CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -3594,6 +3593,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3629,7 +3629,6 @@ // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3644,6 +3643,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3672,7 +3672,6 @@ // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -3685,6 +3684,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3726,7 +3726,6 @@ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -3736,6 +3735,7 @@ // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: ret void @@ -4117,7 +4117,6 @@ // CHECK18-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -4138,6 +4137,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4173,7 +4173,6 @@ // CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4188,6 +4187,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4216,7 +4216,6 @@ // CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -4229,6 +4228,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4270,7 +4270,6 @@ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -4280,6 +4279,7 @@ // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: ret void @@ -4658,7 +4658,6 @@ // CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -4677,6 +4676,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4711,7 +4711,6 @@ // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -4726,6 +4725,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4754,7 +4754,6 @@ // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -4767,6 +4766,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4808,7 +4808,6 @@ // CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -4816,6 +4815,7 @@ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: ret void @@ -5190,7 +5190,6 @@ // CHECK20-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -5209,6 +5208,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5243,7 +5243,6 @@ // CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -5258,6 +5257,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5286,7 +5286,6 @@ // CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5299,6 +5298,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5340,7 +5340,6 @@ // CHECK20-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -5348,6 +5347,7 @@ // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: ret void @@ -5422,7 +5422,6 @@ // CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5435,6 +5434,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5478,7 +5478,6 @@ // CHECK25-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -5499,6 +5498,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5534,7 +5534,6 @@ // CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5549,6 +5548,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5578,7 +5578,6 @@ // CHECK25-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -5588,6 +5587,7 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: ret void @@ -5659,7 +5659,6 @@ // CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5672,6 +5671,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5715,7 +5715,6 @@ // CHECK26-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -5736,6 +5735,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5771,7 +5771,6 @@ // CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5786,6 +5785,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5815,7 +5815,6 @@ // CHECK26-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -5825,6 +5824,7 @@ // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: ret void @@ -5896,7 +5896,6 @@ // CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5909,6 +5908,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -5952,7 +5952,6 @@ // CHECK27-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -5971,6 +5970,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -6005,7 +6005,6 @@ // CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -6020,6 +6019,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -6049,7 +6049,6 @@ // CHECK27-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -6057,6 +6056,7 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: ret void @@ -6124,7 +6124,6 @@ // CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -6137,6 +6136,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6180,7 +6180,6 @@ // CHECK28-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -6199,6 +6198,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6233,7 +6233,6 @@ // CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -6248,6 +6247,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6277,7 +6277,6 @@ // CHECK28-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -6285,6 +6284,7 @@ // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -286,7 +286,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -322,6 +321,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -633,7 +633,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -669,6 +668,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -787,7 +787,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -832,6 +831,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1134,7 +1134,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1170,6 +1169,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1288,7 +1288,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1333,6 +1332,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1610,7 +1610,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1646,6 +1645,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1957,7 +1957,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1993,6 +1992,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2111,7 +2111,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2156,6 +2155,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2458,7 +2458,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2494,6 +2493,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2612,7 +2612,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2657,6 +2656,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2934,7 +2934,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2970,6 +2969,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3281,7 +3281,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3317,6 +3316,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3435,7 +3435,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3480,6 +3479,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3782,7 +3782,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3818,6 +3817,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3936,7 +3936,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3981,6 +3980,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4258,7 +4258,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4294,6 +4293,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4605,7 +4605,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4641,6 +4640,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4759,7 +4759,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4804,6 +4803,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5106,7 +5106,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5142,6 +5141,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5260,7 +5260,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5305,6 +5304,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5582,7 +5582,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5618,6 +5617,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5929,7 +5929,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5965,6 +5964,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6083,7 +6083,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6128,6 +6127,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6430,7 +6430,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6466,6 +6465,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6584,7 +6584,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6629,6 +6628,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6906,7 +6906,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6942,6 +6941,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7253,7 +7253,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7289,6 +7288,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7407,7 +7407,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7452,6 +7451,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7754,7 +7754,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7790,6 +7789,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7908,7 +7908,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7953,6 +7952,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8230,7 +8230,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8266,6 +8265,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8577,7 +8577,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8613,6 +8612,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8731,7 +8731,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8776,6 +8775,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9078,7 +9078,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9114,6 +9113,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9232,7 +9232,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9277,6 +9276,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9554,7 +9554,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9590,6 +9589,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9901,7 +9901,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9937,6 +9936,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10055,7 +10055,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10100,6 +10099,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10402,7 +10402,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10438,6 +10437,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10556,7 +10556,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10601,6 +10600,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10696,4 +10696,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -341,7 +341,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -377,6 +376,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -716,7 +716,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -752,6 +751,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -884,7 +884,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -929,6 +928,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1259,7 +1259,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1295,6 +1294,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1427,7 +1427,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1472,6 +1471,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 // CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1815,7 +1815,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1851,6 +1850,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2190,7 +2190,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2226,6 +2225,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2358,7 +2358,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2403,6 +2402,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2733,7 +2733,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2769,6 +2768,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2901,7 +2901,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2946,6 +2945,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 // CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -3289,7 +3289,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3325,6 +3324,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 // CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3664,7 +3664,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3700,6 +3699,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3835,8 +3835,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3891,6 +3889,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 // CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -3931,6 +3930,7 @@ // CHECK3: omp_if.else16: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] @@ -4441,7 +4441,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4477,6 +4476,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4609,7 +4609,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4654,6 +4653,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 // CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -4997,7 +4997,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5033,6 +5032,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 // CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5372,7 +5372,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5408,6 +5407,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5543,8 +5543,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5599,6 +5597,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 // CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -5639,6 +5638,7 @@ // CHECK4: omp_if.else16: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END18]] @@ -6149,7 +6149,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6185,6 +6184,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6317,7 +6317,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6362,6 +6361,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 // CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -7891,7 +7891,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7927,6 +7926,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8266,7 +8266,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8302,6 +8301,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8434,7 +8434,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8479,6 +8478,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -8809,7 +8809,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8845,6 +8844,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8977,7 +8977,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9022,6 +9021,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 // CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -9365,7 +9365,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9401,6 +9400,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9740,7 +9740,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9776,6 +9775,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9908,7 +9908,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9953,6 +9952,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -10283,7 +10283,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10319,6 +10318,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10451,7 +10451,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10496,6 +10495,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 // CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -10839,7 +10839,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10875,6 +10874,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 // CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11214,7 +11214,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11250,6 +11249,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11385,8 +11385,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11441,6 +11439,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 // CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -11481,6 +11480,7 @@ // CHECK11: omp_if.else16: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] @@ -11991,7 +11991,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12027,6 +12026,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12159,7 +12159,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12204,6 +12203,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 // CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -12547,7 +12547,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12583,6 +12582,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 // CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12922,7 +12922,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12958,6 +12957,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13093,8 +13093,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13149,6 +13147,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 // CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -13189,6 +13188,7 @@ // CHECK12: omp_if.else16: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END18]] @@ -13699,7 +13699,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13735,6 +13734,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13867,7 +13867,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13912,6 +13911,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 // CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/task_codegen.c b/clang/test/OpenMP/task_codegen.c --- a/clang/test/OpenMP/task_codegen.c +++ b/clang/test/OpenMP/task_codegen.c @@ -28,8 +28,6 @@ // CHECK: [[DEPOBJ_SIZE_ADDR1:%.+]] = alloca i64, // CHECK: = alloca i64, // CHECK: [[DEP_COUNTER_ADDR:%.+]] = alloca i64, - // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]], - // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( // CHECK: [[ALLOC:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 65, i64 48, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, [[PRIVATES_TY:%.+]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*)) // CHECK: [[EVT_VAL:%.+]] = call i8* @__kmpc_task_allow_completion_event(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[ALLOC]]) @@ -41,6 +39,7 @@ // CHECK: [[D_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP]], i{{.+}} -1 // CHECK: [[D_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP_BASE]], i{{.+}} 0, i{{.+}} 0 // CHECK: [[SIZE1:%.+]] = load i64, i64* [[D_DEP_BASE_SIZE]], + // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[SIZE:%.+]] = add nuw i64 [[SZ]], [[SIZE1]] // CHECK: store i64 [[SIZE]], i64* [[DEPOBJ_SIZE_ADDR]], @@ -49,6 +48,7 @@ // CHECK: [[X_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP]], i{{.+}} -1 // CHECK: [[X_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP_BASE]], i{{.+}} 0, i{{.+}} 0 // CHECK: [[SIZE2:%.+]] = load i64, i64* [[X_DEP_BASE_SIZE]], + // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]], // CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR1]], // CHECK: [[SIZE3:%.+]] = add nuw i64 [[SZ]], [[SIZE2]] // CHECK: store i64 [[SIZE3]], i64* [[DEPOBJ_SIZE_ADDR1]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -294,7 +294,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -330,6 +329,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -638,7 +638,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -674,6 +673,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -797,7 +797,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -842,6 +841,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1150,7 +1150,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1186,6 +1185,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1309,7 +1309,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1354,6 +1353,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1631,7 +1631,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1667,6 +1666,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1975,7 +1975,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2011,6 +2010,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2134,7 +2134,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2179,6 +2178,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2487,7 +2487,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2523,6 +2522,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2646,7 +2646,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2691,6 +2690,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2968,7 +2968,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3004,6 +3003,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3312,7 +3312,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3348,6 +3347,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3471,7 +3471,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3516,6 +3515,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3824,7 +3824,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3860,6 +3859,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3983,7 +3983,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4028,6 +4027,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4305,7 +4305,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4341,6 +4340,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4649,7 +4649,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4685,6 +4684,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4808,7 +4808,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4853,6 +4852,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5161,7 +5161,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5197,6 +5196,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5320,7 +5320,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5365,6 +5364,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5642,7 +5642,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5678,6 +5677,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5986,7 +5986,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6022,6 +6021,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6145,7 +6145,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6190,6 +6189,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6498,7 +6498,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6534,6 +6533,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6657,7 +6657,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6702,6 +6701,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6979,7 +6979,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7015,6 +7014,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7323,7 +7323,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7359,6 +7358,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7482,7 +7482,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7527,6 +7526,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7835,7 +7835,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7871,6 +7870,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7994,7 +7994,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8039,6 +8038,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8316,7 +8316,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8352,6 +8351,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8660,7 +8660,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8696,6 +8695,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8819,7 +8819,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8864,6 +8863,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9172,7 +9172,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9208,6 +9207,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9331,7 +9331,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9376,6 +9375,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9653,7 +9653,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9689,6 +9688,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9997,7 +9997,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10033,6 +10032,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10156,7 +10156,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10201,6 +10200,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10509,7 +10509,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10545,6 +10544,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10668,7 +10668,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10713,6 +10712,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10808,4 +10808,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -177,23 +177,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -254,23 +254,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -305,7 +305,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -330,26 +329,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -410,24 +410,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -548,23 +548,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -625,24 +625,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -677,7 +677,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -702,26 +701,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -782,24 +782,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -850,7 +850,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -877,35 +876,36 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -966,24 +966,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1102,23 +1102,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1179,24 +1179,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1231,7 +1231,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1256,26 +1255,27 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1336,24 +1336,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1404,7 +1404,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1431,35 +1430,36 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1520,24 +1520,24 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1626,23 +1626,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1703,23 +1703,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1754,7 +1754,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1779,26 +1778,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1859,24 +1859,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1997,23 +1997,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2074,24 +2074,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2126,7 +2126,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2151,26 +2150,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2231,24 +2231,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2299,7 +2299,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2326,35 +2325,36 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2415,24 +2415,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2551,23 +2551,23 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2628,24 +2628,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2680,7 +2680,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2705,26 +2704,27 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2785,24 +2785,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2853,7 +2853,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2880,35 +2879,36 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2969,24 +2969,24 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3075,23 +3075,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3152,23 +3152,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3203,7 +3203,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3228,26 +3227,27 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3308,24 +3308,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3446,23 +3446,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3523,24 +3523,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3575,7 +3575,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3611,6 +3610,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3619,7 +3619,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3697,7 +3697,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3751,8 +3751,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3783,41 +3781,42 @@ // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK3: omp_if.else6: @@ -3847,6 +3846,7 @@ // CHECK3: omp_if.else16: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] @@ -3857,7 +3857,7 @@ // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end21: // CHECK3-NEXT: br label [[OMP_IF_END22]] // CHECK3: omp_if.end22: @@ -3927,24 +3927,24 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -3983,7 +3983,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4055,24 +4055,24 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4111,7 +4111,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4234,23 +4234,23 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4311,24 +4311,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4363,7 +4363,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4399,6 +4398,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4407,7 +4407,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4485,7 +4485,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4536,7 +4536,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4563,35 +4562,36 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4652,24 +4652,24 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4758,23 +4758,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4835,23 +4835,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4886,7 +4886,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4911,26 +4910,27 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4991,24 +4991,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5129,23 +5129,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5206,24 +5206,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5258,7 +5258,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5294,6 +5293,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5302,7 +5302,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5380,7 +5380,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5434,8 +5434,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5466,41 +5464,42 @@ // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK4: omp_if.else6: @@ -5530,6 +5529,7 @@ // CHECK4: omp_if.else16: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END18]] @@ -5540,7 +5540,7 @@ // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK4: omp.inner.for.end21: // CHECK4-NEXT: br label [[OMP_IF_END22]] // CHECK4: omp_if.end22: @@ -5610,24 +5610,24 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5666,7 +5666,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5738,24 +5738,24 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5794,7 +5794,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5917,23 +5917,23 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5994,24 +5994,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6046,7 +6046,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6082,6 +6081,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6090,7 +6090,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6168,7 +6168,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6219,7 +6219,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6246,35 +6245,36 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6335,24 +6335,24 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7623,23 +7623,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7700,23 +7700,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7751,7 +7751,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7776,26 +7775,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7856,24 +7856,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7994,23 +7994,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8071,24 +8071,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8123,7 +8123,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8148,26 +8147,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8228,24 +8228,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8296,7 +8296,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8323,35 +8322,36 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8412,24 +8412,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8548,23 +8548,23 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8625,24 +8625,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8677,7 +8677,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8702,26 +8701,27 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8782,24 +8782,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8850,7 +8850,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8877,35 +8876,36 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8966,24 +8966,24 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9072,23 +9072,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9149,23 +9149,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9200,7 +9200,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9225,26 +9224,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9305,24 +9305,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9443,23 +9443,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9520,24 +9520,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9572,7 +9572,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9597,26 +9596,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9677,24 +9677,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9745,7 +9745,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9772,35 +9771,36 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9861,24 +9861,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9997,23 +9997,23 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10074,24 +10074,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10126,7 +10126,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10151,26 +10150,27 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10231,24 +10231,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10299,7 +10299,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10326,35 +10325,36 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10415,24 +10415,24 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10521,23 +10521,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10598,23 +10598,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10649,7 +10649,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10674,26 +10673,27 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10754,24 +10754,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10892,23 +10892,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10969,24 +10969,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11021,7 +11021,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11057,6 +11056,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11065,7 +11065,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11143,7 +11143,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11197,8 +11197,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11229,41 +11227,42 @@ // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK11: omp_if.else6: @@ -11293,6 +11292,7 @@ // CHECK11: omp_if.else16: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] @@ -11303,7 +11303,7 @@ // CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK11: omp.inner.for.end21: // CHECK11-NEXT: br label [[OMP_IF_END22]] // CHECK11: omp_if.end22: @@ -11373,24 +11373,24 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11429,7 +11429,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11501,24 +11501,24 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11557,7 +11557,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11680,23 +11680,23 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11757,24 +11757,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11809,7 +11809,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11845,6 +11844,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11853,7 +11853,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11931,7 +11931,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11982,7 +11982,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12009,35 +12008,36 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12098,24 +12098,24 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12204,23 +12204,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12281,23 +12281,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12332,7 +12332,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12357,26 +12356,27 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12437,24 +12437,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12575,23 +12575,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12652,24 +12652,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12704,7 +12704,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12740,6 +12739,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12748,7 +12748,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12826,7 +12826,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12880,8 +12880,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12912,41 +12910,42 @@ // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK12: omp_if.else6: @@ -12976,6 +12975,7 @@ // CHECK12: omp_if.else16: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END18]] @@ -12986,7 +12986,7 @@ // CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK12-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK12: omp.inner.for.end21: // CHECK12-NEXT: br label [[OMP_IF_END22]] // CHECK12: omp_if.end22: @@ -13056,24 +13056,24 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13112,7 +13112,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13184,24 +13184,24 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13240,7 +13240,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13363,23 +13363,23 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13440,24 +13440,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13492,7 +13492,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13528,6 +13527,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13536,7 +13536,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13614,7 +13614,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13665,7 +13665,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13692,35 +13691,36 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13781,24 +13781,24 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/vla_crash.c b/clang/test/OpenMP/vla_crash.c --- a/clang/test/OpenMP/vla_crash.c +++ b/clang/test/OpenMP/vla_crash.c @@ -29,7 +29,6 @@ // CHECK1-NEXT: [[C:%.*]] = alloca i32***, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 @@ -37,6 +36,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[B]], i64 [[TMP4]], i32**** [[C]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -90,7 +90,6 @@ // CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -100,6 +99,7 @@ // CHECK1-NEXT: store i32* [[TMP3]], i32** [[P]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[P]], i32** [[A_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -130,4 +130,3 @@ // CHECK1: if.end: // CHECK1-NEXT: ret void // -// \ No newline at end of file